From 10bc4b3fd48dadbe7989eb46d9d84662d0bf720e Mon Sep 17 00:00:00 2001
From: Ben Kurtovic <ben@benkurtovic.com>
Date: Sun, 7 Apr 2024 19:55:25 -0400
Subject: [PATCH] Python 3.11+ cleanup and bugfixes

---
 CHANGELOG                             |   4 +-
 earwigbot/irc/watcher.py              |  35 ++++---
 earwigbot/lazy.py                     |  15 +--
 earwigbot/managers.py                 |  42 ++++----
 earwigbot/tasks/__init__.py           |   2 +-
 earwigbot/tasks/wikiproject_tagger.py |  88 ++++++++++------
 earwigbot/wiki/page.py                |  19 ++--
 earwigbot/wiki/site.py                | 190 +++++++++++++++++++++++-----------
 earwigbot/wiki/sitesdb.py             |  86 ++++++++++-----
 setup.py                              |  38 +++----
 10 files changed, 335 insertions(+), 184 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index d546b69..ea68d05 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,12 +1,14 @@
 v0.4 (unreleased):
 
-- Migrated to Python 3.
+- Migrated to Python 3 (3.11+).
+- Migrated from oursql to pymysql.
 - Copyvios: Configurable proxy support for specific domains.
 - Copyvios: Parser-directed URL redirection.
 - Copyvios: General parsing improvements.
 - Copyvios: URL exclusion improvements.
 - Copyvios: Removed long-deprecated Yahoo! BOSS search engine.
 - Wiki: Fixed not sending Content-Type header in POST requests.
+- IRC: Moved default server from Freenode to Libera.
 - IRC: Remember joined channels across restarts.
 - IRC: Added !listchans.
 - IRC > !stalk: Added modifiers to change message format or filter messages.
diff --git a/earwigbot/irc/watcher.py b/earwigbot/irc/watcher.py
index b049fef..db85ae4 100644
--- a/earwigbot/irc/watcher.py
+++ b/earwigbot/irc/watcher.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -20,12 +20,14 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-import imp
+import importlib.machinery
+import importlib.util
 
 from earwigbot.irc import IRCConnection, RC
 
 __all__ = ["Watcher"]
 
+
 class Watcher(IRCConnection):
     """
     **EarwigBot: IRC Watcher Component**
@@ -40,16 +42,23 @@ class Watcher(IRCConnection):
     def __init__(self, bot):
         self.bot = bot
         cf = bot.config.irc["watcher"]
-        super().__init__(cf["host"], cf["port"], cf["nick"], cf["ident"],
-                         cf["realname"], bot.logger.getChild("watcher"))
+        super().__init__(
+            cf["host"],
+            cf["port"],
+            cf["nick"],
+            cf["ident"],
+            cf["realname"],
+            bot.logger.getChild("watcher"),
+        )
         self._prepare_process_hook()
         self._connect()
 
     def __repr__(self):
         """Return the canonical string representation of the Watcher."""
         res = "Watcher(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})"
-        return res.format(self.host, self.port, self.nick, self.ident,
-                          self.realname, self.bot)
+        return res.format(
+            self.host, self.port, self.nick, self.ident, self.realname, self.bot
+        )
 
     def __str__(self):
         """Return a nice string representation of the Watcher."""
@@ -88,17 +97,11 @@ class Watcher(IRCConnection):
         self._process_hook = lambda bot, rc: ()
 
         path = self.bot.config.root_dir
-        try:
-            f, path, desc = imp.find_module("rules", [path])
-        except ImportError:
+        spec = importlib.machinery.PathFinder.find_spec("rules", [path])
+        if spec is None or spec.loader is None:
             return
-        try:
-            module = imp.load_module("rules", f, path, desc)
-        except Exception:
-            return
-        finally:
-            f.close()
-
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
         self._process_hook_module = module
         try:
             self._process_hook = module.process
diff --git a/earwigbot/lazy.py b/earwigbot/lazy.py
index b6c07b3..e0795bd 100644
--- a/earwigbot/lazy.py
+++ b/earwigbot/lazy.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -27,7 +27,6 @@ manner, so that they can be referred to by name but are not actually loaded
 until they are used (i.e. their attributes are read or modified).
 """
 
-from imp import acquire_lock, release_lock
 import importlib
 import sys
 from threading import RLock
@@ -36,12 +35,16 @@ from types import ModuleType
 __all__ = ["LazyImporter"]
 
 _real_get = ModuleType.__getattribute__
+_lazy_init_lock = RLock()
+
 
 def _create_failing_get(exc):
     def _fail(self, attr):
         raise exc
+
     return _fail
 
+
 def _mock_get(self, attr):
     with _real_get(self, "_lock"):
         if _real_get(self, "_unloaded"):
@@ -59,14 +62,13 @@ def _mock_get(self, attr):
 
 class _LazyModule(type):
     def __new__(cls, name):
-        acquire_lock()
-        try:
+        with _lazy_init_lock:
             if name not in sys.modules:
                 attributes = {
                     "__name__": name,
                     "__getattribute__": _mock_get,
                     "_unloaded": True,
-                    "_lock": RLock()
+                    "_lock": RLock(),
                 }
                 parents = (ModuleType,)
                 klass = type.__new__(cls, "module", parents, attributes)
@@ -74,8 +76,6 @@ class _LazyModule(type):
                 if "." in name:  # Also ensure the parent exists
                     _LazyModule(name.rsplit(".", 1)[0])
             return sys.modules[name]
-        finally:
-            release_lock()
 
 
 class LazyImporter:
@@ -84,6 +84,7 @@ class LazyImporter:
     This inserts itself into :py:data:`sys.meta_path`, storing a dictionary of
     :py:class:`_LazyModule`\ s (which is added to with :py:meth:`new`).
     """
+
     def __init__(self):
         self._modules = {}
         sys.meta_path.append(self)
diff --git a/earwigbot/managers.py b/earwigbot/managers.py
index 0debfb8..e834ae1 100644
--- a/earwigbot/managers.py
+++ b/earwigbot/managers.py
@@ -1,7 +1,7 @@
 #! /usr/bin/env python
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,8 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-import imp
+import importlib.machinery
+import importlib.util
 from os import listdir, path
 from re import sub
 from threading import RLock, Thread
@@ -32,6 +33,7 @@ from earwigbot.tasks import Task
 
 __all__ = ["CommandManager", "TaskManager"]
 
+
 class _ResourceManager:
     """
     **EarwigBot: Resource Manager**
@@ -48,6 +50,7 @@ class _ResourceManager:
     :py:meth:`load`, retrieving specific resources via :py:meth:`get`, and
     iterating over all resources via :py:meth:`__iter__`.
     """
+
     def __init__(self, bot, name, base):
         self.bot = bot
         self.logger = bot.logger.getChild(name)
@@ -60,8 +63,9 @@ class _ResourceManager:
     def __repr__(self):
         """Return the canonical string representation of the manager."""
         res = "{0}(bot={1!r}, name={2!r}, base={3!r})"
-        return res.format(self.__class__.__name__, self.bot,
-                          self._resource_name, self._resource_base)
+        return res.format(
+            self.__class__.__name__, self.bot, self._resource_name, self._resource_base
+        )
 
     def __str__(self):
         """Return a nice string representation of the manager."""
@@ -100,22 +104,22 @@ class _ResourceManager:
     def _load_module(self, name, path):
         """Load a specific resource from a module, identified by name and path.
 
-        We'll first try to import it using imp magic, and if that works, make
-        instances of any classes inside that are subclasses of the base
+        We'll first try to import it using importlib magic, and if that works,
+        make instances of any classes inside that are subclasses of the base
         (:py:attr:`self._resource_base <_resource_base>`), add them to the
         resources dictionary with :py:meth:`self._load_resource()
         <_load_resource>`, and finally log the addition. Any problems along
         the way will either be ignored or logged.
         """
-        f, path, desc = imp.find_module(name, [path])
+        spec = importlib.machinery.PathFinder.find_spec(name, [path])
         try:
-            module = imp.load_module(name, f, path, desc)
+            assert spec is not None, "Spec must not be None"
+            assert spec.loader is not None, "Loader must not be None"
+            module = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(module)
         except Exception:
-            e = "Couldn't load module '{0}' (from {1})"
-            self.logger.exception(e.format(name, path))
+            self.logger.exception(f"Couldn't load module {name!r} (from {path})")
             return
-        finally:
-            f.close()
 
         for obj in vars(module).values():
             if type(obj) is type:
@@ -132,7 +136,7 @@ class _ResourceManager:
                 continue
             if name.startswith("_") or name.startswith("."):
                 continue
-            modname = sub("\.pyc?$", "", name)  # Remove extension
+            modname = sub(r"\.pyc?$", "", name)  # Remove extension
             if modname in processed:
                 continue
             processed.append(modname)
@@ -200,6 +204,7 @@ class CommandManager(_ResourceManager):
     """
     Manages (i.e., loads, reloads, and calls) IRC commands.
     """
+
     def __init__(self, bot):
         super().__init__(bot, "commands", Command)
 
@@ -234,8 +239,7 @@ class CommandManager(_ResourceManager):
 
         for command in self:
             if hook in command.hooks and self._wrap_check(command, data):
-                thread = Thread(target=self._wrap_process,
-                                args=(command, data))
+                thread = Thread(target=self._wrap_process, args=(command, data))
                 start_time = strftime("%b %d %H:%M:%S")
                 thread.name = "irc:{0} ({1})".format(command.name, start_time)
                 thread.daemon = True
@@ -247,6 +251,7 @@ class TaskManager(_ResourceManager):
     """
     Manages (i.e., loads, reloads, schedules, and runs) wiki bot tasks.
     """
+
     def __init__(self, bot):
         super().__init__(bot, "tasks", Task)
 
@@ -292,11 +297,12 @@ class TaskManager(_ResourceManager):
         if not now:
             now = gmtime()
         # Get list of tasks to run this turn:
-        tasks = self.bot.config.schedule(now.tm_min, now.tm_hour, now.tm_mday,
-                                         now.tm_mon, now.tm_wday)
+        tasks = self.bot.config.schedule(
+            now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday
+        )
 
         for task in tasks:
-            if isinstance(task, list):          # They've specified kwargs,
+            if isinstance(task, list):  # They've specified kwargs,
                 self.start(task[0], **task[1])  # so pass those to start
             else:  # Otherwise, just pass task_name
                 self.start(task)
diff --git a/earwigbot/tasks/__init__.py b/earwigbot/tasks/__init__.py
index 3f10846..3b6c37e 100644
--- a/earwigbot/tasks/__init__.py
+++ b/earwigbot/tasks/__init__.py
@@ -146,7 +146,7 @@ class Task:
         try:
             content = page.get()
         except exceptions.PageNotFoundError:
-            return False
+            return True
         if content == cfg.get("disabled", "run"):
             return False
 
diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py
index 9e67cac..4c0649e 100644
--- a/earwigbot/tasks/wikiproject_tagger.py
+++ b/earwigbot/tasks/wikiproject_tagger.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2009-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -26,6 +26,7 @@ from earwigbot import exceptions
 from earwigbot.tasks import Task
 from earwigbot.wiki import constants
 
+
 class WikiProjectTagger(Task):
     """A task to tag talk pages with WikiProject banners.
 
@@ -76,28 +77,24 @@ class WikiProjectTagger(Task):
         edited
 
     """
+
     name = "wikiproject_tagger"
 
     # Regexes for template names that should always go above the banner, based
     # on [[Wikipedia:Talk page layout]]:
     TOP_TEMPS = [
         r"skip ?to ?(toc|talk|toctalk)$",
-
         r"ga ?nominee$",
-
         r"(user ?)?talk ?(header|page|page ?header)$",
-
         r"community ?article ?probation$",
         r"censor(-nudity)?$",
         r"blp(o| ?others?)?$",
         r"controvers(ial2?|y)$",
-
         r"(not ?(a ?)?)?forum$",
         r"tv(episode|series)talk$",
         r"recurring ?themes$",
         r"faq$",
         r"(round ?in ?)?circ(les|ular)$",
-
         r"ar(ti|it)cle ?(history|milestones)$",
         r"failed ?ga$",
         r"old ?prod( ?full)?$",
@@ -144,10 +141,18 @@ class WikiProjectTagger(Task):
         else:
             only_with = None
 
-        job = _Job(banner=banner, names=names, summary=summary, update=update,
-                   append=append, autoassess=autoassess, only_with=only_with,
-                   nocreate=nocreate, tag_categories=tag_categories,
-                   dry_run=dry_run)
+        job = _Job(
+            banner=banner,
+            names=names,
+            summary=summary,
+            update=update,
+            append=append,
+            autoassess=autoassess,
+            only_with=only_with,
+            nocreate=nocreate,
+            tag_categories=tag_categories,
+            dry_run=dry_run,
+        )
 
         try:
             self.run_job(kwargs, site, job, recursive)
@@ -165,7 +170,6 @@ class WikiProjectTagger(Task):
             with open(kwargs["file"], "r") as fileobj:
                 for line in fileobj:
                     if line.strip():
-                        line = line.decode("utf8")
                         if line.startswith("[[") and line.endswith("]]"):
                             line = line[2:-2]
                         page = site.get_page(line)
@@ -201,8 +205,13 @@ class WikiProjectTagger(Task):
             return banner, None
 
         names = {banner, title}
-        result = site.api_query(action="query", list="backlinks", bllimit=500,
-                                blfilterredir="redirects", bltitle=title)
+        result = site.api_query(
+            action="query",
+            list="backlinks",
+            bllimit=500,
+            blfilterredir="redirects",
+            bltitle=title,
+        )
         for backlink in result["query"]["backlinks"]:
             names.add(backlink["title"])
             if backlink["ns"] == constants.NS_TEMPLATE:
@@ -215,8 +224,9 @@ class WikiProjectTagger(Task):
     def process_category(self, page, job, recursive):
         """Try to tag all pages in the given category."""
         if page.title in job.processed_cats:
-            self.logger.debug("Skipping category, already processed: [[%s]]",
-                              page.title)
+            self.logger.debug(
+                "Skipping category, already processed: [[%s]]", page.title
+            )
             return
         self.logger.info("Processing category: [[%s]]", page.title)
         job.processed_cats.add(page.title)
@@ -243,8 +253,7 @@ class WikiProjectTagger(Task):
             page = page.toggle_talk()
 
         if page.title in job.processed_pages:
-            self.logger.debug("Skipping page, already processed: [[%s]]",
-                              page.title)
+            self.logger.debug("Skipping page, already processed: [[%s]]", page.title)
             return
         job.processed_pages.add(page.title)
 
@@ -275,21 +284,22 @@ class WikiProjectTagger(Task):
                     return
 
         if job.only_with:
-            if not any(template.name.matches(job.only_with)
-                       for template in code.ifilter_templates(recursive=True)):
+            if not any(
+                template.name.matches(job.only_with)
+                for template in code.ifilter_templates(recursive=True)
+            ):
                 log = "Skipping page: [[%s]]; fails only-with condition"
                 self.logger.info(log, page.title)
                 return
 
         if is_update:
-            old_banner = str(banner)
-            self.update_banner(banner, job, code)
-            if banner == old_banner:
+            updated = self.update_banner(banner, job, code)
+            if not updated:
                 log = "Skipping page: [[%s]]; already tagged and no updates"
                 self.logger.info(log, page.title)
                 return
             self.logger.info("Updating banner on page: [[%s]]", page.title)
-            banner = banner.encode("utf8")
+            banner = str(banner)
         else:
             self.logger.info("Tagging page: [[%s]]", page.title)
             banner = self.make_banner(job, code)
@@ -334,9 +344,11 @@ class WikiProjectTagger(Task):
 
     def update_banner(self, banner, job, code):
         """Update an existing *banner* based on a *job* and a page's *code*."""
-        has = lambda key: (banner.has(key) and
-                           banner.get(key).value.strip() not in ("", "?"))
+        has = lambda key: (
+            banner.has(key) and banner.get(key).value.strip() not in ("", "?")
+        )
 
+        updated = False
         if job.autoassess is not False:
             if not has("class"):
                 assess, reason = self.get_autoassessment(code, job.autoassess)
@@ -349,6 +361,8 @@ class WikiProjectTagger(Task):
                 key, value = param.split("=", 1)
                 if not has(key):
                     banner.add(key, value)
+                    updated = True
+        return updated
 
     def get_autoassessment(self, code, only_classes=None):
         """Get an autoassessment for a page.
@@ -356,16 +370,27 @@ class WikiProjectTagger(Task):
         Return (assessed class as a string or None, assessment reason or None).
         """
         if only_classes is None or only_classes is True:
-            classnames = ["a", "b", "book", "c", "dab", "fa", "fl", "ga",
-                          "list", "redirect", "start", "stub"]
+            classnames = [
+                "a",
+                "b",
+                "book",
+                "c",
+                "dab",
+                "fa",
+                "fl",
+                "ga",
+                "list",
+                "redirect",
+                "start",
+                "stub",
+            ]
         else:
-            classnames = [klass.strip().lower()
-                          for klass in only_classes.split(",")]
+            classnames = [klass.strip().lower() for klass in only_classes.split(",")]
 
         classes = {klass: 0 for klass in classnames}
         for template in code.ifilter_templates(recursive=True):
             if template.has("class"):
-                value = str(template.get("class").value).lower()
+                value = str(template.get("class").value).strip().lower()
                 if value in classes:
                     classes[value] += 1
 
@@ -429,6 +454,7 @@ class WikiProjectTagger(Task):
             self.logger.debug("Inserting banner at beginning")
             code.insert(0, banner + "\n")
 
+
 class _Job:
     """Represents a single wikiproject-tagging task.
 
@@ -436,6 +462,7 @@ class _Job:
     or not to autoassess and create new pages from scratch, and a counter of
     the number of pages edited.
     """
+
     def __init__(self, **kwargs):
         self.banner = kwargs["banner"]
         self.names = kwargs["names"]
@@ -456,4 +483,5 @@ class _Job:
 class _ShutoffEnabled(Exception):
     """Raised by process_page() if shutoff is enabled. Caught by run(), which
     will then stop the task."""
+
     pass
diff --git a/earwigbot/wiki/page.py b/earwigbot/wiki/page.py
index 43d18d3..aa61e44 100644
--- a/earwigbot/wiki/page.py
+++ b/earwigbot/wiki/page.py
@@ -280,7 +280,7 @@ class Page(CopyvioMixIn):
             self._assert_existence()
 
     def _edit(self, params=None, text=None, summary=None, minor=None, bot=None,
-              force=None, section=None, captcha_id=None, captcha_word=None):
+              force=None, section=None, captcha_id=None, captcha_word=None, **kwargs):
         """Edit the page!
 
         If *params* is given, we'll use it as our API query parameters.
@@ -297,7 +297,7 @@ class Page(CopyvioMixIn):
         # Build our API query string:
         if not params:
             params = self._build_edit_params(text, summary, minor, bot, force,
-                                             section, captcha_id, captcha_word)
+                                             section, captcha_id, captcha_word, kwargs)
         else: # Make sure we have the right token:
             params["token"] = self.site.get_token()
 
@@ -320,7 +320,7 @@ class Page(CopyvioMixIn):
         raise exceptions.EditError(result["edit"])
 
     def _build_edit_params(self, text, summary, minor, bot, force, section,
-                           captcha_id, captcha_word):
+                           captcha_id, captcha_word, kwargs):
         """Given some keyword arguments, build an API edit query string."""
         unitxt = text.encode("utf8") if isinstance(text, str) else text
         hashed = md5(unitxt).hexdigest()  # Checksum to ensure text is correct
@@ -351,6 +351,11 @@ class Page(CopyvioMixIn):
         else:
             params["recreate"] = "true"
 
+        for key, val in kwargs.items():
+            if val is None:
+                params.pop(key, None)
+            else:
+                params[key] = val
         return params
 
     def _handle_edit_errors(self, error, params, retry=True):
@@ -657,7 +662,7 @@ class Page(CopyvioMixIn):
         """
         return mwparserfromhell.parse(self.get())
 
-    def edit(self, text, summary, minor=False, bot=True, force=False):
+    def edit(self, text, summary, minor=False, bot=True, force=False, **kwargs):
         """Replace the page's content or creates a new page.
 
         *text* is the new page content, with *summary* as the edit summary.
@@ -670,9 +675,9 @@ class Page(CopyvioMixIn):
         editing our page. Be careful with this!
         """
         self._edit(text=text, summary=summary, minor=minor, bot=bot,
-                   force=force)
+                   force=force, **kwargs)
 
-    def add_section(self, text, title, minor=False, bot=True, force=False):
+    def add_section(self, text, title, minor=False, bot=True, force=False, **kwargs):
         """Add a new section to the bottom of the page.
 
         The arguments for this are the same as those for :py:meth:`edit`, but
@@ -683,7 +688,7 @@ class Page(CopyvioMixIn):
         new section as content.
         """
         self._edit(text=text, summary=title, minor=minor, bot=bot, force=force,
-                   section="new")
+                   section="new", **kwargs)
 
     def check_exclusion(self, username=None, optouts=None):
         """Check whether or not we are allowed to edit the page.
diff --git a/earwigbot/wiki/site.py b/earwigbot/wiki/site.py
index 60b6f08..274be89 100644
--- a/earwigbot/wiki/site.py
+++ b/earwigbot/wiki/site.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -37,10 +37,11 @@ from earwigbot.wiki.category import Category
 from earwigbot.wiki.page import Page
 from earwigbot.wiki.user import User
 
-oursql = importer.new("oursql")
+pymysql = importer.new("pymysql")
 
 __all__ = ["Site"]
 
+
 class Site:
     """
     **EarwigBot: Wiki Toolset: Site**
@@ -80,18 +81,41 @@ class Site:
     - :py:meth:`get_user`:             returns a User object for the given name
     - :py:meth:`delegate`:             controls when the API or SQL is used
     """
+
     SERVICE_API = 1
     SERVICE_SQL = 2
-    SPECIAL_TOKENS = ["createaccount", "deleteglobalaccount", "login",
-                      "patrol", "rollback", "setglobalaccountstatus",
-                      "userrights", "watch"]
-
-    def __init__(self, name=None, project=None, lang=None, base_url=None,
-                 article_path=None, script_path=None, sql=None,
-                 namespaces=None, login=(None, None), oauth=None,
-                 cookiejar=None, user_agent=None, use_https=True,
-                 assert_edit=None, maxlag=None, wait_between_queries=1,
-                 logger=None, search_config=None):
+    SPECIAL_TOKENS = [
+        "createaccount",
+        "deleteglobalaccount",
+        "login",
+        "patrol",
+        "rollback",
+        "setglobalaccountstatus",
+        "userrights",
+        "watch",
+    ]
+
+    def __init__(
+        self,
+        name=None,
+        project=None,
+        lang=None,
+        base_url=None,
+        article_path=None,
+        script_path=None,
+        sql=None,
+        namespaces=None,
+        login=(None, None),
+        oauth=None,
+        cookiejar=None,
+        user_agent=None,
+        use_https=True,
+        assert_edit=None,
+        maxlag=None,
+        wait_between_queries=1,
+        logger=None,
+        search_config=None,
+    ):
         """Constructor for new Site instances.
 
         This probably isn't necessary to call yourself unless you're building a
@@ -160,8 +184,11 @@ class Site:
         self._session.headers["User-Agent"] = user_agent
         if oauth:
             self._session.auth = OAuth1(
-                oauth["consumer_token"], oauth["consumer_secret"],
-                oauth["access_token"], oauth["access_secret"])
+                oauth["consumer_token"],
+                oauth["consumer_secret"],
+                oauth["access_token"],
+                oauth["access_secret"],
+            )
 
         # Set up our internal logger:
         if logger:
@@ -182,13 +209,24 @@ class Site:
 
     def __repr__(self):
         """Return the canonical string representation of the Site."""
-        res = ", ".join((
-            "Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}",
-            "base_url={_base_url!r}", "article_path={_article_path!r}",
-            "script_path={_script_path!r}", "use_https={_use_https!r}",
-            "assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}",
-            "sql={_sql_data!r}", "login={0}", "oauth={1}", "user_agent={3!r}",
-            "cookiejar={2})"))
+        res = ", ".join(
+            (
+                "Site(name={_name!r}",
+                "project={_project!r}",
+                "lang={_lang!r}",
+                "base_url={_base_url!r}",
+                "article_path={_article_path!r}",
+                "script_path={_script_path!r}",
+                "use_https={_use_https!r}",
+                "assert_edit={_assert_edit!r}",
+                "maxlag={_maxlag!r}",
+                "sql={_sql_data!r}",
+                "login={0}",
+                "oauth={1}",
+                "user_agent={3!r}",
+                "cookiejar={2})",
+            )
+        )
         name, password = self._login_info
         login = "({0}, {1})".format(repr(name), "hidden" if password else None)
         oauth = "hidden" if self._oauth else None
@@ -211,8 +249,15 @@ class Site:
             return value
         return str(value, encoding)
 
-    def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False,
-                   no_assert=False, ae_retry=True):
+    def _api_query(
+        self,
+        params,
+        tries=0,
+        wait=5,
+        ignore_maxlag=False,
+        no_assert=False,
+        ae_retry=True,
+    ):
         """Do an API query with *params* as a dict of parameters.
 
         See the documentation for :py:meth:`api_query` for full implementation
@@ -348,8 +393,14 @@ class Site:
         """
         # All attributes to be loaded, except _namespaces, which is a special
         # case because it requires additional params in the API query:
-        attrs = [self._name, self._project, self._lang, self._base_url,
-                 self._article_path, self._script_path]
+        attrs = [
+            self._name,
+            self._project,
+            self._lang,
+            self._base_url,
+            self._article_path,
+            self._script_path,
+        ]
 
         params = {"action": "query", "meta": "siteinfo", "siprop": "general"}
 
@@ -359,7 +410,7 @@ class Site:
                 result = self._api_query(params, no_assert=True)
             self._load_namespaces(result)
         elif all(attrs):  # Everything is already specified and we're not told
-            return        # to force a reload, so do nothing
+            return  # to force a reload, so do nothing
         else:  # We're only loading attributes other than _namespaces
             with self._api_lock:
                 result = self._api_query(params, no_assert=True)
@@ -424,11 +475,11 @@ class Site:
         (for that, we'd do self._login_info[0]), but rather to get our current
         username without an unnecessary ?action=query&meta=userinfo API query.
         """
-        name = ''.join((self._name, "Token"))
+        name = "".join((self._name, "Token"))
         cookie = self._get_cookie(name, self.domain)
 
         if cookie:
-            name = ''.join((self._name, "UserName"))
+            name = "".join((self._name, "UserName"))
             user_name = self._get_cookie(name, self.domain)
             if user_name:
                 return unquote_plus(user_name.value)
@@ -528,8 +579,12 @@ class Site:
         except KeyError:
             raise exceptions.LoginError("Couldn't get login token")
 
-        params = {"action": "login", "lgname": name, "lgpassword": password,
-                  "lgtoken": token}
+        params = {
+            "action": "login",
+            "lgname": name,
+            "lgpassword": password,
+            "lgtoken": token,
+        }
         with self._api_lock:
             result = self._api_query(params, no_assert=True)
 
@@ -564,18 +619,22 @@ class Site:
     def _sql_connect(self, **kwargs):
         """Attempt to establish a connection with this site's SQL database.
 
-        oursql.connect() will be called with self._sql_data as its kwargs.
+        pymysql.connect() will be called with self._sql_data as its kwargs.
         Any kwargs given to this function will be passed to connect() and will
         have precedence over the config file.
 
-        Will raise SQLError() if the module "oursql" is not available. oursql
-        may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot
+        Will raise SQLError() if the module "pymysql" is not available. pymysql
+        may raise its own exceptions (e.g. pymysql.InterfaceError) if it cannot
         establish a connection.
         """
         args = self._sql_data
         for key, value in kwargs.items():
             args[key] = value
-        if "read_default_file" not in args and "user" not in args and "passwd" not in args:
+        if (
+            "read_default_file" not in args
+            and "user" not in args
+            and "passwd" not in args
+        ):
             args["read_default_file"] = expanduser("~/.my.cnf")
         elif "read_default_file" in args:
             args["read_default_file"] = expanduser(args["read_default_file"])
@@ -585,9 +644,9 @@ class Site:
             args["autoreconnect"] = True
 
         try:
-            self._sql_conn = oursql.connect(**args)
+            self._sql_conn = pymysql.connect(**args)
         except ImportError:
-            e = "SQL querying requires the 'oursql' package: https://pythonhosted.org/oursql/"
+            e = "SQL querying requires the 'pymysql' package: https://pymysql.readthedocs.io/"
             raise exceptions.SQLError(e)
 
     def _get_service_order(self):
@@ -608,8 +667,11 @@ class Site:
         if now - self._sql_info_cache["lastcheck"] > 120:
             self._sql_info_cache["lastcheck"] = now
             try:
-                self._sql_info_cache["replag"] = sqllag = self.get_replag()
-            except (exceptions.SQLError, oursql.Error):
+                try:
+                    self._sql_info_cache["replag"] = sqllag = self.get_replag()
+                except pymysql.Error as exc:
+                    raise exceptions.SQLError(str(exc))
+            except (exceptions.SQLError, ImportError):
                 self._sql_info_cache["usable"] = False
                 return [self.SERVICE_API]
             self._sql_info_cache["usable"] = True
@@ -705,24 +767,31 @@ class Site:
         with self._api_lock:
             return self._api_query(kwargs)
 
-    def sql_query(self, query, params=(), plain_query=False, dict_cursor=False,
-                  cursor_class=None, show_table=False, buffsize=1024):
+    def sql_query(
+        self,
+        query,
+        params=(),
+        plain_query=False,
+        dict_cursor=False,
+        cursor_class=None,
+        buffsize=1024,
+    ):
         """Do an SQL query and yield its results.
 
         If *plain_query* is ``True``, we will force an unparameterized query.
         Specifying both *params* and *plain_query* will cause an error. If
-        *dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as
-        our cursor, otherwise the default :py:class:`oursql.Cursor`. If
-        *cursor_class* is given, it will override this option. If *show_table*
-        is True, the name of the table will be prepended to the name of the
-        column. This will mainly affect an :py:class:`~oursql.DictCursor`.
+        *dict_cursor* is ``True``, we will use
+        :py:class:`pymysql.cursors.DictCursor` as our cursor, otherwise the
+        default :py:class:`pymysql.cursors.Cursor`. If *cursor_class* is given,
+        it will override this option.
 
         *buffsize* is the size of each memory-buffered group of results, to
         reduce the number of conversations with the database; it is passed to
-        :py:meth:`cursor.fetchmany() <oursql.Cursor.fetchmany>`. If set to
-        ``0```, all results will be buffered in memory at once (this uses
-        :py:meth:`fetchall() <oursql.Cursor.fetchall>`). If set to ``1``, it is
-        equivalent to using :py:meth:`fetchone() <oursql.Cursor.fetchone>`.
+        :py:meth:`cursor.fetchmany() <pymysql.cursors.Cursor.fetchmany>`. If
+        set to ``0```, all results will be buffered in memory at once (this
+        uses :py:meth:`fetchall() <pymysql.cursors.Cursor.fetchall>`). If set
+        to ``1``, it is equivalent to using
+        :py:meth:`fetchone() <pymysql.cursors.Cursor.fetchone>`.
 
         Example usage::
 
@@ -736,25 +805,25 @@ class Site:
             {'user_id': 7418060L, 'user_registration': '20080703215134'}
 
         This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
-        oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
-        :py:exc:`oursql.InterfaceError`, ...) if there were problems with the
+        pymysql's exceptions (:py:exc:`pymysql.ProgrammingError`,
+        :py:exc:`pymysql.InterfaceError`, ...) if there were problems with the
         query.
 
         See :py:meth:`_sql_connect` for information on how a connection is
-        acquired. Also relevant is `oursql's documentation
-        <https://pythonhosted.org/oursql/>`_ for details on that package.
+        acquired. Also relevant is `pymysql's documentation
+        <https://pymysql.readthedocs.io/>`_ for details on that package.
         """
         if not cursor_class:
             if dict_cursor:
-                cursor_class = oursql.DictCursor
+                cursor_class = pymysql.cursors.DictCursor
             else:
-                cursor_class = oursql.Cursor
+                cursor_class = pymysql.cursors.Cursor
         klass = cursor_class
 
         with self._sql_lock:
             if not self._sql_conn:
                 self._sql_connect()
-            with self._sql_conn.cursor(klass, show_table=show_table) as cur:
+            with self._sql_conn.cursor(klass) as cur:
                 cur.execute(query, params, plain_query)
                 if buffsize:
                     while True:
@@ -798,8 +867,8 @@ class Site:
         time from the timestamp of the latest recent changes event.
 
         This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
-        oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
-        :py:exc:`oursql.InterfaceError`, ...) if there were problems.
+        pymysql's exceptions (:py:exc:`pymysql.ProgrammingError`,
+        :py:exc:`pymysql.InterfaceError`, ...) if there were problems.
         """
         query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM
                    recentchanges ORDER BY rc_timestamp DESC LIMIT 1"""
@@ -886,8 +955,7 @@ class Site:
         prefix = title.split(":", 1)[0]
         if prefix != title:  # Avoid a page that is simply "Category"
             if prefix in prefixes:
-                return Category(self, title, follow_redirects, pageid,
-                                self._logger)
+                return Category(self, title, follow_redirects, pageid, self._logger)
         return Page(self, title, follow_redirects, pageid, self._logger)
 
     def get_category(self, catname, follow_redirects=False, pageid=None):
@@ -899,7 +967,7 @@ class Site:
         """
         catname = self._unicodeify(catname)
         prefix = self.namespace_id_to_name(constants.NS_CATEGORY)
-        pagename = ':'.join((prefix, catname))
+        pagename = ":".join((prefix, catname))
         return Category(self, pagename, follow_redirects, pageid, self._logger)
 
     def get_user(self, username=None):
diff --git a/earwigbot/wiki/sitesdb.py b/earwigbot/wiki/sitesdb.py
index a01ef64..aff98b5 100644
--- a/earwigbot/wiki/sitesdb.py
+++ b/earwigbot/wiki/sitesdb.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -35,6 +35,7 @@ from earwigbot.wiki.site import Site
 
 __all__ = ["SitesDB"]
 
+
 class SitesDB:
     """
     **EarwigBot: Wiki Toolset: Sites Database Manager**
@@ -106,7 +107,7 @@ class SitesDB:
                 # Create the file and restrict reading/writing only to the
                 # owner, so others can't peak at our cookies:
                 open(self._cookie_file, "w").close()
-                chmod(self._cookie_file, stat.S_IRUSR|stat.S_IWUSR)
+                chmod(self._cookie_file, stat.S_IRUSR | stat.S_IWUSR)
             else:
                 raise
 
@@ -172,8 +173,16 @@ class SitesDB:
             except KeyError:
                 namespaces[ns_id] = [ns_name]
 
-        return (name, project, lang, base_url, article_path, script_path, sql,
-                namespaces)
+        return (
+            name,
+            project,
+            lang,
+            base_url,
+            article_path,
+            script_path,
+            sql,
+            namespaces,
+        )
 
     def _make_site_object(self, name):
         """Return a Site object associated with the site *name* in our sitesdb.
@@ -182,8 +191,9 @@ class SitesDB:
         raised if the site is not in our sitesdb.
         """
         cookiejar = self._get_cookiejar()
-        (name, project, lang, base_url, article_path, script_path, sql,
-         namespaces) = self._load_site_from_sitesdb(name)
+        (name, project, lang, base_url, article_path, script_path, sql, namespaces) = (
+            self._load_site_from_sitesdb(name)
+        )
 
         config = self.config
         login = (config.wiki.get("username"), config.wiki.get("password"))
@@ -211,13 +221,26 @@ class SitesDB:
                 if isinstance(value, str) and "$1" in value:
                     sql[key] = value.replace("$1", name)
 
-        return Site(name=name, project=project, lang=lang, base_url=base_url,
-                    article_path=article_path, script_path=script_path,
-                    sql=sql, namespaces=namespaces, login=login, oauth=oauth,
-                    cookiejar=cookiejar, user_agent=user_agent,
-                    use_https=use_https, assert_edit=assert_edit,
-                    maxlag=maxlag, wait_between_queries=wait_between_queries,
-                    logger=logger, search_config=search_config)
+        return Site(
+            name=name,
+            project=project,
+            lang=lang,
+            base_url=base_url,
+            article_path=article_path,
+            script_path=script_path,
+            sql=sql,
+            namespaces=namespaces,
+            login=login,
+            oauth=oauth,
+            cookiejar=cookiejar,
+            user_agent=user_agent,
+            use_https=use_https,
+            assert_edit=assert_edit,
+            maxlag=maxlag,
+            wait_between_queries=wait_between_queries,
+            logger=logger,
+            search_config=search_config,
+        )
 
     def _get_site_name_from_sitesdb(self, project, lang):
         """Return the name of the first site with the given project and lang.
@@ -255,8 +278,14 @@ class SitesDB:
         database. If the sitesdb doesn't exist, we'll create it first.
         """
         name = site.name
-        sites_data = (name, site.project, site.lang, site._base_url,
-                      site._article_path, site._script_path)
+        sites_data = (
+            name,
+            site.project,
+            site.lang,
+            site._base_url,
+            site._article_path,
+            site._script_path,
+        )
         sql_data = [(name, key, val) for key, val in site._sql_data.items()]
         ns_data = []
         for ns_id, ns_names in site._namespaces.items():
@@ -353,8 +382,9 @@ class SitesDB:
         e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang)
         raise SiteNotFoundError(e)
 
-    def add_site(self, project=None, lang=None, base_url=None,
-                 script_path="/w", sql=None):
+    def add_site(
+        self, project=None, lang=None, base_url=None, script_path="/w", sql=None
+    ):
         """Add a site to the sitesdb so it can be retrieved with get_site().
 
         If only a project and a lang are given, we'll guess the *base_url* as
@@ -368,8 +398,8 @@ class SitesDB:
         your wiki is different, provide the script_path as an argument. SQL
         connection settings are guessed automatically using config's template
         value. If this is wrong or not specified, provide a dict of kwargs as
-        *sql* and Site will pass it to :py:func:`oursql.connect(**sql)
-        <oursql.connect>`, allowing you to make queries with
+        *sql* and Site will pass it to :py:func:`pymysql.connect(**sql)
+        <pymysql.connect>`, allowing you to make queries with
         :py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`.
 
         Returns ``True`` if the site was added successfully or ``False`` if the
@@ -399,11 +429,19 @@ class SitesDB:
             user_agent = user_agent.replace("$2", python_version())
 
         # Create a Site object to log in and load the other attributes:
-        site = Site(base_url=base_url, script_path=script_path, sql=sql,
-                    login=login, oauth=oauth, cookiejar=cookiejar,
-                    user_agent=user_agent, use_https=use_https,
-                    assert_edit=assert_edit, maxlag=maxlag,
-                    wait_between_queries=wait_between_queries)
+        site = Site(
+            base_url=base_url,
+            script_path=script_path,
+            sql=sql,
+            login=login,
+            oauth=oauth,
+            cookiejar=cookiejar,
+            user_agent=user_agent,
+            use_https=use_https,
+            assert_edit=assert_edit,
+            maxlag=maxlag,
+            wait_between_queries=wait_between_queries,
+        )
 
         self._logger.info("Added site '{0}'".format(site.name))
         self._add_site_to_sitesdb(site)
diff --git a/setup.py b/setup.py
index a928353..9feb5c6 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 #! /usr/bin/env python
 # -*- coding: utf-8  -*-
 #
-# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com>
+# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -37,11 +37,11 @@ extra_deps = {
         "cryptography >= 3.4.7",  # Storing bot passwords + keys in the config file
     ],
     "sql": [
-        "oursql3 >= 0.9.4",  # Interfacing with MediaWiki databases
+        "pymysql >= 1.1.0",  # Interfacing with MediaWiki databases
     ],
     "copyvios": [
         "beautifulsoup4 >= 4.9.3",  # Parsing/scraping HTML
-        "cchardet >= 2.1.7",  # Encoding detection for BeautifulSoup
+        "charset_normalizer >= 3.3.2",  # Encoding detection for BeautifulSoup
         "lxml >= 4.6.3",  # Faster parser for BeautifulSoup
         "nltk >= 3.6.1",  # Parsing sentences to split article content
         "pdfminer >= 20191125",  # Extracting text from PDF files
@@ -58,21 +58,21 @@ with open("README.rst") as fp:
     long_docs = fp.read()
 
 setup(
-    name = "earwigbot",
-    packages = find_packages(exclude=("tests",)),
-    entry_points = {"console_scripts": ["earwigbot = earwigbot.util:main"]},
-    install_requires = dependencies,
-    test_suite = "tests",
-    version = __version__,
-    author = "Ben Kurtovic",
-    author_email = "ben.kurtovic@gmail.com",
-    url = "https://github.com/earwig/earwigbot",
-    description = "EarwigBot is a Python robot that edits Wikipedia and interacts with people over IRC.",
-    long_description = long_docs,
-    download_url = "https://github.com/earwig/earwigbot/tarball/v{0}".format(__version__),
-    keywords = "earwig earwigbot irc wikipedia wiki mediawiki",
-    license = "MIT License",
-    classifiers = [
+    name="earwigbot",
+    packages=find_packages(exclude=("tests",)),
+    entry_points={"console_scripts": ["earwigbot = earwigbot.util:main"]},
+    install_requires=dependencies,
+    test_suite="tests",
+    version=__version__,
+    author="Ben Kurtovic",
+    author_email="ben.kurtovic@gmail.com",
+    url="https://github.com/earwig/earwigbot",
+    description="EarwigBot is a Python robot that edits Wikipedia and interacts with people over IRC.",
+    long_description=long_docs,
+    download_url="https://github.com/earwig/earwigbot/tarball/v{0}".format(__version__),
+    keywords="earwig earwigbot irc wikipedia wiki mediawiki",
+    license="MIT License",
+    classifiers=[
         "Development Status :: 3 - Alpha",
         "Environment :: Console",
         "Intended Audience :: Developers",
@@ -81,6 +81,6 @@ setup(
         "Operating System :: OS Independent",
         "Programming Language :: Python :: 3",
         "Topic :: Communications :: Chat :: Internet Relay Chat",
-        "Topic :: Internet :: WWW/HTTP"
+        "Topic :: Internet :: WWW/HTTP",
     ],
 )