From 10bc4b3fd48dadbe7989eb46d9d84662d0bf720e Mon Sep 17 00:00:00 2001 From: Ben Kurtovic Date: Sun, 7 Apr 2024 19:55:25 -0400 Subject: [PATCH] Python 3.11+ cleanup and bugfixes --- CHANGELOG | 4 +- earwigbot/irc/watcher.py | 35 ++++--- earwigbot/lazy.py | 15 +-- earwigbot/managers.py | 42 ++++---- earwigbot/tasks/__init__.py | 2 +- earwigbot/tasks/wikiproject_tagger.py | 88 ++++++++++------ earwigbot/wiki/page.py | 19 ++-- earwigbot/wiki/site.py | 190 +++++++++++++++++++++++----------- earwigbot/wiki/sitesdb.py | 86 ++++++++++----- setup.py | 38 +++---- 10 files changed, 335 insertions(+), 184 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index d546b69..ea68d05 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,12 +1,14 @@ v0.4 (unreleased): -- Migrated to Python 3. +- Migrated to Python 3 (3.11+). +- Migrated from oursql to pymysql. - Copyvios: Configurable proxy support for specific domains. - Copyvios: Parser-directed URL redirection. - Copyvios: General parsing improvements. - Copyvios: URL exclusion improvements. - Copyvios: Removed long-deprecated Yahoo! BOSS search engine. - Wiki: Fixed not sending Content-Type header in POST requests. +- IRC: Moved default server from Freenode to Libera. - IRC: Remember joined channels across restarts. - IRC: Added !listchans. - IRC > !stalk: Added modifiers to change message format or filter messages. diff --git a/earwigbot/irc/watcher.py b/earwigbot/irc/watcher.py index b049fef..db85ae4 100644 --- a/earwigbot/irc/watcher.py +++ b/earwigbot/irc/watcher.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2009-2015 Ben Kurtovic +# Copyright (C) 2009-2024 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,12 +20,14 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import imp +import importlib.machinery +import importlib.util from earwigbot.irc import IRCConnection, RC __all__ = ["Watcher"] + class Watcher(IRCConnection): """ **EarwigBot: IRC Watcher Component** @@ -40,16 +42,23 @@ class Watcher(IRCConnection): def __init__(self, bot): self.bot = bot cf = bot.config.irc["watcher"] - super().__init__(cf["host"], cf["port"], cf["nick"], cf["ident"], - cf["realname"], bot.logger.getChild("watcher")) + super().__init__( + cf["host"], + cf["port"], + cf["nick"], + cf["ident"], + cf["realname"], + bot.logger.getChild("watcher"), + ) self._prepare_process_hook() self._connect() def __repr__(self): """Return the canonical string representation of the Watcher.""" res = "Watcher(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})" - return res.format(self.host, self.port, self.nick, self.ident, - self.realname, self.bot) + return res.format( + self.host, self.port, self.nick, self.ident, self.realname, self.bot + ) def __str__(self): """Return a nice string representation of the Watcher.""" @@ -88,17 +97,11 @@ class Watcher(IRCConnection): self._process_hook = lambda bot, rc: () path = self.bot.config.root_dir - try: - f, path, desc = imp.find_module("rules", [path]) - except ImportError: + spec = importlib.machinery.PathFinder.find_spec("rules", [path]) + if spec is None or spec.loader is None: return - try: - module = imp.load_module("rules", f, path, desc) - except Exception: - return - finally: - f.close() - + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) self._process_hook_module = module try: self._process_hook = module.process diff --git a/earwigbot/lazy.py b/earwigbot/lazy.py index b6c07b3..e0795bd 100644 --- a/earwigbot/lazy.py +++ b/earwigbot/lazy.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2009-2015 Ben Kurtovic +# Copyright (C) 2009-2024 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -27,7 +27,6 @@ manner, so that they can be referred to by name but are not actually loaded until they are used (i.e. their attributes are read or modified). """ -from imp import acquire_lock, release_lock import importlib import sys from threading import RLock @@ -36,12 +35,16 @@ from types import ModuleType __all__ = ["LazyImporter"] _real_get = ModuleType.__getattribute__ +_lazy_init_lock = RLock() + def _create_failing_get(exc): def _fail(self, attr): raise exc + return _fail + def _mock_get(self, attr): with _real_get(self, "_lock"): if _real_get(self, "_unloaded"): @@ -59,14 +62,13 @@ def _mock_get(self, attr): class _LazyModule(type): def __new__(cls, name): - acquire_lock() - try: + with _lazy_init_lock: if name not in sys.modules: attributes = { "__name__": name, "__getattribute__": _mock_get, "_unloaded": True, - "_lock": RLock() + "_lock": RLock(), } parents = (ModuleType,) klass = type.__new__(cls, "module", parents, attributes) @@ -74,8 +76,6 @@ class _LazyModule(type): if "." in name: # Also ensure the parent exists _LazyModule(name.rsplit(".", 1)[0]) return sys.modules[name] - finally: - release_lock() class LazyImporter: @@ -84,6 +84,7 @@ class LazyImporter: This inserts itself into :py:data:`sys.meta_path`, storing a dictionary of :py:class:`_LazyModule`\ s (which is added to with :py:meth:`new`). """ + def __init__(self): self._modules = {} sys.meta_path.append(self) diff --git a/earwigbot/managers.py b/earwigbot/managers.py index 0debfb8..e834ae1 100644 --- a/earwigbot/managers.py +++ b/earwigbot/managers.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (C) 2009-2015 Ben Kurtovic +# Copyright (C) 2009-2024 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -21,7 +21,8 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import imp +import importlib.machinery +import importlib.util from os import listdir, path from re import sub from threading import RLock, Thread @@ -32,6 +33,7 @@ from earwigbot.tasks import Task __all__ = ["CommandManager", "TaskManager"] + class _ResourceManager: """ **EarwigBot: Resource Manager** @@ -48,6 +50,7 @@ class _ResourceManager: :py:meth:`load`, retrieving specific resources via :py:meth:`get`, and iterating over all resources via :py:meth:`__iter__`. """ + def __init__(self, bot, name, base): self.bot = bot self.logger = bot.logger.getChild(name) @@ -60,8 +63,9 @@ class _ResourceManager: def __repr__(self): """Return the canonical string representation of the manager.""" res = "{0}(bot={1!r}, name={2!r}, base={3!r})" - return res.format(self.__class__.__name__, self.bot, - self._resource_name, self._resource_base) + return res.format( + self.__class__.__name__, self.bot, self._resource_name, self._resource_base + ) def __str__(self): """Return a nice string representation of the manager.""" @@ -100,22 +104,22 @@ class _ResourceManager: def _load_module(self, name, path): """Load a specific resource from a module, identified by name and path. - We'll first try to import it using imp magic, and if that works, make - instances of any classes inside that are subclasses of the base + We'll first try to import it using importlib magic, and if that works, + make instances of any classes inside that are subclasses of the base (:py:attr:`self._resource_base <_resource_base>`), add them to the resources dictionary with :py:meth:`self._load_resource() <_load_resource>`, and finally log the addition. Any problems along the way will either be ignored or logged. """ - f, path, desc = imp.find_module(name, [path]) + spec = importlib.machinery.PathFinder.find_spec(name, [path]) try: - module = imp.load_module(name, f, path, desc) + assert spec is not None, "Spec must not be None" + assert spec.loader is not None, "Loader must not be None" + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) except Exception: - e = "Couldn't load module '{0}' (from {1})" - self.logger.exception(e.format(name, path)) + self.logger.exception(f"Couldn't load module {name!r} (from {path})") return - finally: - f.close() for obj in vars(module).values(): if type(obj) is type: @@ -132,7 +136,7 @@ class _ResourceManager: continue if name.startswith("_") or name.startswith("."): continue - modname = sub("\.pyc?$", "", name) # Remove extension + modname = sub(r"\.pyc?$", "", name) # Remove extension if modname in processed: continue processed.append(modname) @@ -200,6 +204,7 @@ class CommandManager(_ResourceManager): """ Manages (i.e., loads, reloads, and calls) IRC commands. """ + def __init__(self, bot): super().__init__(bot, "commands", Command) @@ -234,8 +239,7 @@ class CommandManager(_ResourceManager): for command in self: if hook in command.hooks and self._wrap_check(command, data): - thread = Thread(target=self._wrap_process, - args=(command, data)) + thread = Thread(target=self._wrap_process, args=(command, data)) start_time = strftime("%b %d %H:%M:%S") thread.name = "irc:{0} ({1})".format(command.name, start_time) thread.daemon = True @@ -247,6 +251,7 @@ class TaskManager(_ResourceManager): """ Manages (i.e., loads, reloads, schedules, and runs) wiki bot tasks. """ + def __init__(self, bot): super().__init__(bot, "tasks", Task) @@ -292,11 +297,12 @@ class TaskManager(_ResourceManager): if not now: now = gmtime() # Get list of tasks to run this turn: - tasks = self.bot.config.schedule(now.tm_min, now.tm_hour, now.tm_mday, - now.tm_mon, now.tm_wday) + tasks = self.bot.config.schedule( + now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday + ) for task in tasks: - if isinstance(task, list): # They've specified kwargs, + if isinstance(task, list): # They've specified kwargs, self.start(task[0], **task[1]) # so pass those to start else: # Otherwise, just pass task_name self.start(task) diff --git a/earwigbot/tasks/__init__.py b/earwigbot/tasks/__init__.py index 3f10846..3b6c37e 100644 --- a/earwigbot/tasks/__init__.py +++ b/earwigbot/tasks/__init__.py @@ -146,7 +146,7 @@ class Task: try: content = page.get() except exceptions.PageNotFoundError: - return False + return True if content == cfg.get("disabled", "run"): return False diff --git a/earwigbot/tasks/wikiproject_tagger.py b/earwigbot/tasks/wikiproject_tagger.py index 9e67cac..4c0649e 100644 --- a/earwigbot/tasks/wikiproject_tagger.py +++ b/earwigbot/tasks/wikiproject_tagger.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2009-2017 Ben Kurtovic +# Copyright (C) 2009-2024 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -26,6 +26,7 @@ from earwigbot import exceptions from earwigbot.tasks import Task from earwigbot.wiki import constants + class WikiProjectTagger(Task): """A task to tag talk pages with WikiProject banners. @@ -76,28 +77,24 @@ class WikiProjectTagger(Task): edited """ + name = "wikiproject_tagger" # Regexes for template names that should always go above the banner, based # on [[Wikipedia:Talk page layout]]: TOP_TEMPS = [ r"skip ?to ?(toc|talk|toctalk)$", - r"ga ?nominee$", - r"(user ?)?talk ?(header|page|page ?header)$", - r"community ?article ?probation$", r"censor(-nudity)?$", r"blp(o| ?others?)?$", r"controvers(ial2?|y)$", - r"(not ?(a ?)?)?forum$", r"tv(episode|series)talk$", r"recurring ?themes$", r"faq$", r"(round ?in ?)?circ(les|ular)$", - r"ar(ti|it)cle ?(history|milestones)$", r"failed ?ga$", r"old ?prod( ?full)?$", @@ -144,10 +141,18 @@ class WikiProjectTagger(Task): else: only_with = None - job = _Job(banner=banner, names=names, summary=summary, update=update, - append=append, autoassess=autoassess, only_with=only_with, - nocreate=nocreate, tag_categories=tag_categories, - dry_run=dry_run) + job = _Job( + banner=banner, + names=names, + summary=summary, + update=update, + append=append, + autoassess=autoassess, + only_with=only_with, + nocreate=nocreate, + tag_categories=tag_categories, + dry_run=dry_run, + ) try: self.run_job(kwargs, site, job, recursive) @@ -165,7 +170,6 @@ class WikiProjectTagger(Task): with open(kwargs["file"], "r") as fileobj: for line in fileobj: if line.strip(): - line = line.decode("utf8") if line.startswith("[[") and line.endswith("]]"): line = line[2:-2] page = site.get_page(line) @@ -201,8 +205,13 @@ class WikiProjectTagger(Task): return banner, None names = {banner, title} - result = site.api_query(action="query", list="backlinks", bllimit=500, - blfilterredir="redirects", bltitle=title) + result = site.api_query( + action="query", + list="backlinks", + bllimit=500, + blfilterredir="redirects", + bltitle=title, + ) for backlink in result["query"]["backlinks"]: names.add(backlink["title"]) if backlink["ns"] == constants.NS_TEMPLATE: @@ -215,8 +224,9 @@ class WikiProjectTagger(Task): def process_category(self, page, job, recursive): """Try to tag all pages in the given category.""" if page.title in job.processed_cats: - self.logger.debug("Skipping category, already processed: [[%s]]", - page.title) + self.logger.debug( + "Skipping category, already processed: [[%s]]", page.title + ) return self.logger.info("Processing category: [[%s]]", page.title) job.processed_cats.add(page.title) @@ -243,8 +253,7 @@ class WikiProjectTagger(Task): page = page.toggle_talk() if page.title in job.processed_pages: - self.logger.debug("Skipping page, already processed: [[%s]]", - page.title) + self.logger.debug("Skipping page, already processed: [[%s]]", page.title) return job.processed_pages.add(page.title) @@ -275,21 +284,22 @@ class WikiProjectTagger(Task): return if job.only_with: - if not any(template.name.matches(job.only_with) - for template in code.ifilter_templates(recursive=True)): + if not any( + template.name.matches(job.only_with) + for template in code.ifilter_templates(recursive=True) + ): log = "Skipping page: [[%s]]; fails only-with condition" self.logger.info(log, page.title) return if is_update: - old_banner = str(banner) - self.update_banner(banner, job, code) - if banner == old_banner: + updated = self.update_banner(banner, job, code) + if not updated: log = "Skipping page: [[%s]]; already tagged and no updates" self.logger.info(log, page.title) return self.logger.info("Updating banner on page: [[%s]]", page.title) - banner = banner.encode("utf8") + banner = str(banner) else: self.logger.info("Tagging page: [[%s]]", page.title) banner = self.make_banner(job, code) @@ -334,9 +344,11 @@ class WikiProjectTagger(Task): def update_banner(self, banner, job, code): """Update an existing *banner* based on a *job* and a page's *code*.""" - has = lambda key: (banner.has(key) and - banner.get(key).value.strip() not in ("", "?")) + has = lambda key: ( + banner.has(key) and banner.get(key).value.strip() not in ("", "?") + ) + updated = False if job.autoassess is not False: if not has("class"): assess, reason = self.get_autoassessment(code, job.autoassess) @@ -349,6 +361,8 @@ class WikiProjectTagger(Task): key, value = param.split("=", 1) if not has(key): banner.add(key, value) + updated = True + return updated def get_autoassessment(self, code, only_classes=None): """Get an autoassessment for a page. @@ -356,16 +370,27 @@ class WikiProjectTagger(Task): Return (assessed class as a string or None, assessment reason or None). """ if only_classes is None or only_classes is True: - classnames = ["a", "b", "book", "c", "dab", "fa", "fl", "ga", - "list", "redirect", "start", "stub"] + classnames = [ + "a", + "b", + "book", + "c", + "dab", + "fa", + "fl", + "ga", + "list", + "redirect", + "start", + "stub", + ] else: - classnames = [klass.strip().lower() - for klass in only_classes.split(",")] + classnames = [klass.strip().lower() for klass in only_classes.split(",")] classes = {klass: 0 for klass in classnames} for template in code.ifilter_templates(recursive=True): if template.has("class"): - value = str(template.get("class").value).lower() + value = str(template.get("class").value).strip().lower() if value in classes: classes[value] += 1 @@ -429,6 +454,7 @@ class WikiProjectTagger(Task): self.logger.debug("Inserting banner at beginning") code.insert(0, banner + "\n") + class _Job: """Represents a single wikiproject-tagging task. @@ -436,6 +462,7 @@ class _Job: or not to autoassess and create new pages from scratch, and a counter of the number of pages edited. """ + def __init__(self, **kwargs): self.banner = kwargs["banner"] self.names = kwargs["names"] @@ -456,4 +483,5 @@ class _Job: class _ShutoffEnabled(Exception): """Raised by process_page() if shutoff is enabled. Caught by run(), which will then stop the task.""" + pass diff --git a/earwigbot/wiki/page.py b/earwigbot/wiki/page.py index 43d18d3..aa61e44 100644 --- a/earwigbot/wiki/page.py +++ b/earwigbot/wiki/page.py @@ -280,7 +280,7 @@ class Page(CopyvioMixIn): self._assert_existence() def _edit(self, params=None, text=None, summary=None, minor=None, bot=None, - force=None, section=None, captcha_id=None, captcha_word=None): + force=None, section=None, captcha_id=None, captcha_word=None, **kwargs): """Edit the page! If *params* is given, we'll use it as our API query parameters. @@ -297,7 +297,7 @@ class Page(CopyvioMixIn): # Build our API query string: if not params: params = self._build_edit_params(text, summary, minor, bot, force, - section, captcha_id, captcha_word) + section, captcha_id, captcha_word, kwargs) else: # Make sure we have the right token: params["token"] = self.site.get_token() @@ -320,7 +320,7 @@ class Page(CopyvioMixIn): raise exceptions.EditError(result["edit"]) def _build_edit_params(self, text, summary, minor, bot, force, section, - captcha_id, captcha_word): + captcha_id, captcha_word, kwargs): """Given some keyword arguments, build an API edit query string.""" unitxt = text.encode("utf8") if isinstance(text, str) else text hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct @@ -351,6 +351,11 @@ class Page(CopyvioMixIn): else: params["recreate"] = "true" + for key, val in kwargs.items(): + if val is None: + params.pop(key, None) + else: + params[key] = val return params def _handle_edit_errors(self, error, params, retry=True): @@ -657,7 +662,7 @@ class Page(CopyvioMixIn): """ return mwparserfromhell.parse(self.get()) - def edit(self, text, summary, minor=False, bot=True, force=False): + def edit(self, text, summary, minor=False, bot=True, force=False, **kwargs): """Replace the page's content or creates a new page. *text* is the new page content, with *summary* as the edit summary. @@ -670,9 +675,9 @@ class Page(CopyvioMixIn): editing our page. Be careful with this! """ self._edit(text=text, summary=summary, minor=minor, bot=bot, - force=force) + force=force, **kwargs) - def add_section(self, text, title, minor=False, bot=True, force=False): + def add_section(self, text, title, minor=False, bot=True, force=False, **kwargs): """Add a new section to the bottom of the page. The arguments for this are the same as those for :py:meth:`edit`, but @@ -683,7 +688,7 @@ class Page(CopyvioMixIn): new section as content. """ self._edit(text=text, summary=title, minor=minor, bot=bot, force=force, - section="new") + section="new", **kwargs) def check_exclusion(self, username=None, optouts=None): """Check whether or not we are allowed to edit the page. diff --git a/earwigbot/wiki/site.py b/earwigbot/wiki/site.py index 60b6f08..274be89 100644 --- a/earwigbot/wiki/site.py +++ b/earwigbot/wiki/site.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2009-2021 Ben Kurtovic +# Copyright (C) 2009-2024 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -37,10 +37,11 @@ from earwigbot.wiki.category import Category from earwigbot.wiki.page import Page from earwigbot.wiki.user import User -oursql = importer.new("oursql") +pymysql = importer.new("pymysql") __all__ = ["Site"] + class Site: """ **EarwigBot: Wiki Toolset: Site** @@ -80,18 +81,41 @@ class Site: - :py:meth:`get_user`: returns a User object for the given name - :py:meth:`delegate`: controls when the API or SQL is used """ + SERVICE_API = 1 SERVICE_SQL = 2 - SPECIAL_TOKENS = ["createaccount", "deleteglobalaccount", "login", - "patrol", "rollback", "setglobalaccountstatus", - "userrights", "watch"] - - def __init__(self, name=None, project=None, lang=None, base_url=None, - article_path=None, script_path=None, sql=None, - namespaces=None, login=(None, None), oauth=None, - cookiejar=None, user_agent=None, use_https=True, - assert_edit=None, maxlag=None, wait_between_queries=1, - logger=None, search_config=None): + SPECIAL_TOKENS = [ + "createaccount", + "deleteglobalaccount", + "login", + "patrol", + "rollback", + "setglobalaccountstatus", + "userrights", + "watch", + ] + + def __init__( + self, + name=None, + project=None, + lang=None, + base_url=None, + article_path=None, + script_path=None, + sql=None, + namespaces=None, + login=(None, None), + oauth=None, + cookiejar=None, + user_agent=None, + use_https=True, + assert_edit=None, + maxlag=None, + wait_between_queries=1, + logger=None, + search_config=None, + ): """Constructor for new Site instances. This probably isn't necessary to call yourself unless you're building a @@ -160,8 +184,11 @@ class Site: self._session.headers["User-Agent"] = user_agent if oauth: self._session.auth = OAuth1( - oauth["consumer_token"], oauth["consumer_secret"], - oauth["access_token"], oauth["access_secret"]) + oauth["consumer_token"], + oauth["consumer_secret"], + oauth["access_token"], + oauth["access_secret"], + ) # Set up our internal logger: if logger: @@ -182,13 +209,24 @@ class Site: def __repr__(self): """Return the canonical string representation of the Site.""" - res = ", ".join(( - "Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", - "base_url={_base_url!r}", "article_path={_article_path!r}", - "script_path={_script_path!r}", "use_https={_use_https!r}", - "assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", - "sql={_sql_data!r}", "login={0}", "oauth={1}", "user_agent={3!r}", - "cookiejar={2})")) + res = ", ".join( + ( + "Site(name={_name!r}", + "project={_project!r}", + "lang={_lang!r}", + "base_url={_base_url!r}", + "article_path={_article_path!r}", + "script_path={_script_path!r}", + "use_https={_use_https!r}", + "assert_edit={_assert_edit!r}", + "maxlag={_maxlag!r}", + "sql={_sql_data!r}", + "login={0}", + "oauth={1}", + "user_agent={3!r}", + "cookiejar={2})", + ) + ) name, password = self._login_info login = "({0}, {1})".format(repr(name), "hidden" if password else None) oauth = "hidden" if self._oauth else None @@ -211,8 +249,15 @@ class Site: return value return str(value, encoding) - def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False, - no_assert=False, ae_retry=True): + def _api_query( + self, + params, + tries=0, + wait=5, + ignore_maxlag=False, + no_assert=False, + ae_retry=True, + ): """Do an API query with *params* as a dict of parameters. See the documentation for :py:meth:`api_query` for full implementation @@ -348,8 +393,14 @@ class Site: """ # All attributes to be loaded, except _namespaces, which is a special # case because it requires additional params in the API query: - attrs = [self._name, self._project, self._lang, self._base_url, - self._article_path, self._script_path] + attrs = [ + self._name, + self._project, + self._lang, + self._base_url, + self._article_path, + self._script_path, + ] params = {"action": "query", "meta": "siteinfo", "siprop": "general"} @@ -359,7 +410,7 @@ class Site: result = self._api_query(params, no_assert=True) self._load_namespaces(result) elif all(attrs): # Everything is already specified and we're not told - return # to force a reload, so do nothing + return # to force a reload, so do nothing else: # We're only loading attributes other than _namespaces with self._api_lock: result = self._api_query(params, no_assert=True) @@ -424,11 +475,11 @@ class Site: (for that, we'd do self._login_info[0]), but rather to get our current username without an unnecessary ?action=query&meta=userinfo API query. """ - name = ''.join((self._name, "Token")) + name = "".join((self._name, "Token")) cookie = self._get_cookie(name, self.domain) if cookie: - name = ''.join((self._name, "UserName")) + name = "".join((self._name, "UserName")) user_name = self._get_cookie(name, self.domain) if user_name: return unquote_plus(user_name.value) @@ -528,8 +579,12 @@ class Site: except KeyError: raise exceptions.LoginError("Couldn't get login token") - params = {"action": "login", "lgname": name, "lgpassword": password, - "lgtoken": token} + params = { + "action": "login", + "lgname": name, + "lgpassword": password, + "lgtoken": token, + } with self._api_lock: result = self._api_query(params, no_assert=True) @@ -564,18 +619,22 @@ class Site: def _sql_connect(self, **kwargs): """Attempt to establish a connection with this site's SQL database. - oursql.connect() will be called with self._sql_data as its kwargs. + pymysql.connect() will be called with self._sql_data as its kwargs. Any kwargs given to this function will be passed to connect() and will have precedence over the config file. - Will raise SQLError() if the module "oursql" is not available. oursql - may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot + Will raise SQLError() if the module "pymysql" is not available. pymysql + may raise its own exceptions (e.g. pymysql.InterfaceError) if it cannot establish a connection. """ args = self._sql_data for key, value in kwargs.items(): args[key] = value - if "read_default_file" not in args and "user" not in args and "passwd" not in args: + if ( + "read_default_file" not in args + and "user" not in args + and "passwd" not in args + ): args["read_default_file"] = expanduser("~/.my.cnf") elif "read_default_file" in args: args["read_default_file"] = expanduser(args["read_default_file"]) @@ -585,9 +644,9 @@ class Site: args["autoreconnect"] = True try: - self._sql_conn = oursql.connect(**args) + self._sql_conn = pymysql.connect(**args) except ImportError: - e = "SQL querying requires the 'oursql' package: https://pythonhosted.org/oursql/" + e = "SQL querying requires the 'pymysql' package: https://pymysql.readthedocs.io/" raise exceptions.SQLError(e) def _get_service_order(self): @@ -608,8 +667,11 @@ class Site: if now - self._sql_info_cache["lastcheck"] > 120: self._sql_info_cache["lastcheck"] = now try: - self._sql_info_cache["replag"] = sqllag = self.get_replag() - except (exceptions.SQLError, oursql.Error): + try: + self._sql_info_cache["replag"] = sqllag = self.get_replag() + except pymysql.Error as exc: + raise exceptions.SQLError(str(exc)) + except (exceptions.SQLError, ImportError): self._sql_info_cache["usable"] = False return [self.SERVICE_API] self._sql_info_cache["usable"] = True @@ -705,24 +767,31 @@ class Site: with self._api_lock: return self._api_query(kwargs) - def sql_query(self, query, params=(), plain_query=False, dict_cursor=False, - cursor_class=None, show_table=False, buffsize=1024): + def sql_query( + self, + query, + params=(), + plain_query=False, + dict_cursor=False, + cursor_class=None, + buffsize=1024, + ): """Do an SQL query and yield its results. If *plain_query* is ``True``, we will force an unparameterized query. Specifying both *params* and *plain_query* will cause an error. If - *dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as - our cursor, otherwise the default :py:class:`oursql.Cursor`. If - *cursor_class* is given, it will override this option. If *show_table* - is True, the name of the table will be prepended to the name of the - column. This will mainly affect an :py:class:`~oursql.DictCursor`. + *dict_cursor* is ``True``, we will use + :py:class:`pymysql.cursors.DictCursor` as our cursor, otherwise the + default :py:class:`pymysql.cursors.Cursor`. If *cursor_class* is given, + it will override this option. *buffsize* is the size of each memory-buffered group of results, to reduce the number of conversations with the database; it is passed to - :py:meth:`cursor.fetchmany() `. If set to - ``0```, all results will be buffered in memory at once (this uses - :py:meth:`fetchall() `). If set to ``1``, it is - equivalent to using :py:meth:`fetchone() `. + :py:meth:`cursor.fetchmany() `. If + set to ``0```, all results will be buffered in memory at once (this + uses :py:meth:`fetchall() `). If set + to ``1``, it is equivalent to using + :py:meth:`fetchone() `. Example usage:: @@ -736,25 +805,25 @@ class Site: {'user_id': 7418060L, 'user_registration': '20080703215134'} This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of - oursql's exceptions (:py:exc:`oursql.ProgrammingError`, - :py:exc:`oursql.InterfaceError`, ...) if there were problems with the + pymysql's exceptions (:py:exc:`pymysql.ProgrammingError`, + :py:exc:`pymysql.InterfaceError`, ...) if there were problems with the query. See :py:meth:`_sql_connect` for information on how a connection is - acquired. Also relevant is `oursql's documentation - `_ for details on that package. + acquired. Also relevant is `pymysql's documentation + `_ for details on that package. """ if not cursor_class: if dict_cursor: - cursor_class = oursql.DictCursor + cursor_class = pymysql.cursors.DictCursor else: - cursor_class = oursql.Cursor + cursor_class = pymysql.cursors.Cursor klass = cursor_class with self._sql_lock: if not self._sql_conn: self._sql_connect() - with self._sql_conn.cursor(klass, show_table=show_table) as cur: + with self._sql_conn.cursor(klass) as cur: cur.execute(query, params, plain_query) if buffsize: while True: @@ -798,8 +867,8 @@ class Site: time from the timestamp of the latest recent changes event. This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of - oursql's exceptions (:py:exc:`oursql.ProgrammingError`, - :py:exc:`oursql.InterfaceError`, ...) if there were problems. + pymysql's exceptions (:py:exc:`pymysql.ProgrammingError`, + :py:exc:`pymysql.InterfaceError`, ...) if there were problems. """ query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM recentchanges ORDER BY rc_timestamp DESC LIMIT 1""" @@ -886,8 +955,7 @@ class Site: prefix = title.split(":", 1)[0] if prefix != title: # Avoid a page that is simply "Category" if prefix in prefixes: - return Category(self, title, follow_redirects, pageid, - self._logger) + return Category(self, title, follow_redirects, pageid, self._logger) return Page(self, title, follow_redirects, pageid, self._logger) def get_category(self, catname, follow_redirects=False, pageid=None): @@ -899,7 +967,7 @@ class Site: """ catname = self._unicodeify(catname) prefix = self.namespace_id_to_name(constants.NS_CATEGORY) - pagename = ':'.join((prefix, catname)) + pagename = ":".join((prefix, catname)) return Category(self, pagename, follow_redirects, pageid, self._logger) def get_user(self, username=None): diff --git a/earwigbot/wiki/sitesdb.py b/earwigbot/wiki/sitesdb.py index a01ef64..aff98b5 100644 --- a/earwigbot/wiki/sitesdb.py +++ b/earwigbot/wiki/sitesdb.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2009-2021 Ben Kurtovic +# Copyright (C) 2009-2024 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -35,6 +35,7 @@ from earwigbot.wiki.site import Site __all__ = ["SitesDB"] + class SitesDB: """ **EarwigBot: Wiki Toolset: Sites Database Manager** @@ -106,7 +107,7 @@ class SitesDB: # Create the file and restrict reading/writing only to the # owner, so others can't peak at our cookies: open(self._cookie_file, "w").close() - chmod(self._cookie_file, stat.S_IRUSR|stat.S_IWUSR) + chmod(self._cookie_file, stat.S_IRUSR | stat.S_IWUSR) else: raise @@ -172,8 +173,16 @@ class SitesDB: except KeyError: namespaces[ns_id] = [ns_name] - return (name, project, lang, base_url, article_path, script_path, sql, - namespaces) + return ( + name, + project, + lang, + base_url, + article_path, + script_path, + sql, + namespaces, + ) def _make_site_object(self, name): """Return a Site object associated with the site *name* in our sitesdb. @@ -182,8 +191,9 @@ class SitesDB: raised if the site is not in our sitesdb. """ cookiejar = self._get_cookiejar() - (name, project, lang, base_url, article_path, script_path, sql, - namespaces) = self._load_site_from_sitesdb(name) + (name, project, lang, base_url, article_path, script_path, sql, namespaces) = ( + self._load_site_from_sitesdb(name) + ) config = self.config login = (config.wiki.get("username"), config.wiki.get("password")) @@ -211,13 +221,26 @@ class SitesDB: if isinstance(value, str) and "$1" in value: sql[key] = value.replace("$1", name) - return Site(name=name, project=project, lang=lang, base_url=base_url, - article_path=article_path, script_path=script_path, - sql=sql, namespaces=namespaces, login=login, oauth=oauth, - cookiejar=cookiejar, user_agent=user_agent, - use_https=use_https, assert_edit=assert_edit, - maxlag=maxlag, wait_between_queries=wait_between_queries, - logger=logger, search_config=search_config) + return Site( + name=name, + project=project, + lang=lang, + base_url=base_url, + article_path=article_path, + script_path=script_path, + sql=sql, + namespaces=namespaces, + login=login, + oauth=oauth, + cookiejar=cookiejar, + user_agent=user_agent, + use_https=use_https, + assert_edit=assert_edit, + maxlag=maxlag, + wait_between_queries=wait_between_queries, + logger=logger, + search_config=search_config, + ) def _get_site_name_from_sitesdb(self, project, lang): """Return the name of the first site with the given project and lang. @@ -255,8 +278,14 @@ class SitesDB: database. If the sitesdb doesn't exist, we'll create it first. """ name = site.name - sites_data = (name, site.project, site.lang, site._base_url, - site._article_path, site._script_path) + sites_data = ( + name, + site.project, + site.lang, + site._base_url, + site._article_path, + site._script_path, + ) sql_data = [(name, key, val) for key, val in site._sql_data.items()] ns_data = [] for ns_id, ns_names in site._namespaces.items(): @@ -353,8 +382,9 @@ class SitesDB: e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) raise SiteNotFoundError(e) - def add_site(self, project=None, lang=None, base_url=None, - script_path="/w", sql=None): + def add_site( + self, project=None, lang=None, base_url=None, script_path="/w", sql=None + ): """Add a site to the sitesdb so it can be retrieved with get_site(). If only a project and a lang are given, we'll guess the *base_url* as @@ -368,8 +398,8 @@ class SitesDB: your wiki is different, provide the script_path as an argument. SQL connection settings are guessed automatically using config's template value. If this is wrong or not specified, provide a dict of kwargs as - *sql* and Site will pass it to :py:func:`oursql.connect(**sql) - `, allowing you to make queries with + *sql* and Site will pass it to :py:func:`pymysql.connect(**sql) + `, allowing you to make queries with :py:meth:`site.sql_query `. Returns ``True`` if the site was added successfully or ``False`` if the @@ -399,11 +429,19 @@ class SitesDB: user_agent = user_agent.replace("$2", python_version()) # Create a Site object to log in and load the other attributes: - site = Site(base_url=base_url, script_path=script_path, sql=sql, - login=login, oauth=oauth, cookiejar=cookiejar, - user_agent=user_agent, use_https=use_https, - assert_edit=assert_edit, maxlag=maxlag, - wait_between_queries=wait_between_queries) + site = Site( + base_url=base_url, + script_path=script_path, + sql=sql, + login=login, + oauth=oauth, + cookiejar=cookiejar, + user_agent=user_agent, + use_https=use_https, + assert_edit=assert_edit, + maxlag=maxlag, + wait_between_queries=wait_between_queries, + ) self._logger.info("Added site '{0}'".format(site.name)) self._add_site_to_sitesdb(site) diff --git a/setup.py b/setup.py index a928353..9feb5c6 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (C) 2009-2021 Ben Kurtovic +# Copyright (C) 2009-2024 Ben Kurtovic # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -37,11 +37,11 @@ extra_deps = { "cryptography >= 3.4.7", # Storing bot passwords + keys in the config file ], "sql": [ - "oursql3 >= 0.9.4", # Interfacing with MediaWiki databases + "pymysql >= 1.1.0", # Interfacing with MediaWiki databases ], "copyvios": [ "beautifulsoup4 >= 4.9.3", # Parsing/scraping HTML - "cchardet >= 2.1.7", # Encoding detection for BeautifulSoup + "charset_normalizer >= 3.3.2", # Encoding detection for BeautifulSoup "lxml >= 4.6.3", # Faster parser for BeautifulSoup "nltk >= 3.6.1", # Parsing sentences to split article content "pdfminer >= 20191125", # Extracting text from PDF files @@ -58,21 +58,21 @@ with open("README.rst") as fp: long_docs = fp.read() setup( - name = "earwigbot", - packages = find_packages(exclude=("tests",)), - entry_points = {"console_scripts": ["earwigbot = earwigbot.util:main"]}, - install_requires = dependencies, - test_suite = "tests", - version = __version__, - author = "Ben Kurtovic", - author_email = "ben.kurtovic@gmail.com", - url = "https://github.com/earwig/earwigbot", - description = "EarwigBot is a Python robot that edits Wikipedia and interacts with people over IRC.", - long_description = long_docs, - download_url = "https://github.com/earwig/earwigbot/tarball/v{0}".format(__version__), - keywords = "earwig earwigbot irc wikipedia wiki mediawiki", - license = "MIT License", - classifiers = [ + name="earwigbot", + packages=find_packages(exclude=("tests",)), + entry_points={"console_scripts": ["earwigbot = earwigbot.util:main"]}, + install_requires=dependencies, + test_suite="tests", + version=__version__, + author="Ben Kurtovic", + author_email="ben.kurtovic@gmail.com", + url="https://github.com/earwig/earwigbot", + description="EarwigBot is a Python robot that edits Wikipedia and interacts with people over IRC.", + long_description=long_docs, + download_url="https://github.com/earwig/earwigbot/tarball/v{0}".format(__version__), + keywords="earwig earwigbot irc wikipedia wiki mediawiki", + license="MIT License", + classifiers=[ "Development Status :: 3 - Alpha", "Environment :: Console", "Intended Audience :: Developers", @@ -81,6 +81,6 @@ setup( "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Topic :: Communications :: Chat :: Internet Relay Chat", - "Topic :: Internet :: WWW/HTTP" + "Topic :: Internet :: WWW/HTTP", ], )