@@ -1,12 +1,14 @@ | |||||
v0.4 (unreleased): | v0.4 (unreleased): | ||||
- Migrated to Python 3. | |||||
- Migrated to Python 3 (3.11+). | |||||
- Migrated from oursql to pymysql. | |||||
- Copyvios: Configurable proxy support for specific domains. | - Copyvios: Configurable proxy support for specific domains. | ||||
- Copyvios: Parser-directed URL redirection. | - Copyvios: Parser-directed URL redirection. | ||||
- Copyvios: General parsing improvements. | - Copyvios: General parsing improvements. | ||||
- Copyvios: URL exclusion improvements. | - Copyvios: URL exclusion improvements. | ||||
- Copyvios: Removed long-deprecated Yahoo! BOSS search engine. | - Copyvios: Removed long-deprecated Yahoo! BOSS search engine. | ||||
- Wiki: Fixed not sending Content-Type header in POST requests. | - Wiki: Fixed not sending Content-Type header in POST requests. | ||||
- IRC: Moved default server from Freenode to Libera. | |||||
- IRC: Remember joined channels across restarts. | - IRC: Remember joined channels across restarts. | ||||
- IRC: Added !listchans. | - IRC: Added !listchans. | ||||
- IRC > !stalk: Added modifiers to change message format or filter messages. | - IRC > !stalk: Added modifiers to change message format or filter messages. | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -20,12 +20,14 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
import imp | |||||
import importlib.machinery | |||||
import importlib.util | |||||
from earwigbot.irc import IRCConnection, RC | from earwigbot.irc import IRCConnection, RC | ||||
__all__ = ["Watcher"] | __all__ = ["Watcher"] | ||||
class Watcher(IRCConnection): | class Watcher(IRCConnection): | ||||
""" | """ | ||||
**EarwigBot: IRC Watcher Component** | **EarwigBot: IRC Watcher Component** | ||||
@@ -40,16 +42,23 @@ class Watcher(IRCConnection): | |||||
def __init__(self, bot): | def __init__(self, bot): | ||||
self.bot = bot | self.bot = bot | ||||
cf = bot.config.irc["watcher"] | cf = bot.config.irc["watcher"] | ||||
super().__init__(cf["host"], cf["port"], cf["nick"], cf["ident"], | |||||
cf["realname"], bot.logger.getChild("watcher")) | |||||
super().__init__( | |||||
cf["host"], | |||||
cf["port"], | |||||
cf["nick"], | |||||
cf["ident"], | |||||
cf["realname"], | |||||
bot.logger.getChild("watcher"), | |||||
) | |||||
self._prepare_process_hook() | self._prepare_process_hook() | ||||
self._connect() | self._connect() | ||||
def __repr__(self): | def __repr__(self): | ||||
"""Return the canonical string representation of the Watcher.""" | """Return the canonical string representation of the Watcher.""" | ||||
res = "Watcher(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})" | res = "Watcher(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})" | ||||
return res.format(self.host, self.port, self.nick, self.ident, | |||||
self.realname, self.bot) | |||||
return res.format( | |||||
self.host, self.port, self.nick, self.ident, self.realname, self.bot | |||||
) | |||||
def __str__(self): | def __str__(self): | ||||
"""Return a nice string representation of the Watcher.""" | """Return a nice string representation of the Watcher.""" | ||||
@@ -88,17 +97,11 @@ class Watcher(IRCConnection): | |||||
self._process_hook = lambda bot, rc: () | self._process_hook = lambda bot, rc: () | ||||
path = self.bot.config.root_dir | path = self.bot.config.root_dir | ||||
try: | |||||
f, path, desc = imp.find_module("rules", [path]) | |||||
except ImportError: | |||||
spec = importlib.machinery.PathFinder.find_spec("rules", [path]) | |||||
if spec is None or spec.loader is None: | |||||
return | return | ||||
try: | |||||
module = imp.load_module("rules", f, path, desc) | |||||
except Exception: | |||||
return | |||||
finally: | |||||
f.close() | |||||
module = importlib.util.module_from_spec(spec) | |||||
spec.loader.exec_module(module) | |||||
self._process_hook_module = module | self._process_hook_module = module | ||||
try: | try: | ||||
self._process_hook = module.process | self._process_hook = module.process | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -27,7 +27,6 @@ manner, so that they can be referred to by name but are not actually loaded | |||||
until they are used (i.e. their attributes are read or modified). | until they are used (i.e. their attributes are read or modified). | ||||
""" | """ | ||||
from imp import acquire_lock, release_lock | |||||
import importlib | import importlib | ||||
import sys | import sys | ||||
from threading import RLock | from threading import RLock | ||||
@@ -36,12 +35,16 @@ from types import ModuleType | |||||
__all__ = ["LazyImporter"] | __all__ = ["LazyImporter"] | ||||
_real_get = ModuleType.__getattribute__ | _real_get = ModuleType.__getattribute__ | ||||
_lazy_init_lock = RLock() | |||||
def _create_failing_get(exc): | def _create_failing_get(exc): | ||||
def _fail(self, attr): | def _fail(self, attr): | ||||
raise exc | raise exc | ||||
return _fail | return _fail | ||||
def _mock_get(self, attr): | def _mock_get(self, attr): | ||||
with _real_get(self, "_lock"): | with _real_get(self, "_lock"): | ||||
if _real_get(self, "_unloaded"): | if _real_get(self, "_unloaded"): | ||||
@@ -59,14 +62,13 @@ def _mock_get(self, attr): | |||||
class _LazyModule(type): | class _LazyModule(type): | ||||
def __new__(cls, name): | def __new__(cls, name): | ||||
acquire_lock() | |||||
try: | |||||
with _lazy_init_lock: | |||||
if name not in sys.modules: | if name not in sys.modules: | ||||
attributes = { | attributes = { | ||||
"__name__": name, | "__name__": name, | ||||
"__getattribute__": _mock_get, | "__getattribute__": _mock_get, | ||||
"_unloaded": True, | "_unloaded": True, | ||||
"_lock": RLock() | |||||
"_lock": RLock(), | |||||
} | } | ||||
parents = (ModuleType,) | parents = (ModuleType,) | ||||
klass = type.__new__(cls, "module", parents, attributes) | klass = type.__new__(cls, "module", parents, attributes) | ||||
@@ -74,8 +76,6 @@ class _LazyModule(type): | |||||
if "." in name: # Also ensure the parent exists | if "." in name: # Also ensure the parent exists | ||||
_LazyModule(name.rsplit(".", 1)[0]) | _LazyModule(name.rsplit(".", 1)[0]) | ||||
return sys.modules[name] | return sys.modules[name] | ||||
finally: | |||||
release_lock() | |||||
class LazyImporter: | class LazyImporter: | ||||
@@ -84,6 +84,7 @@ class LazyImporter: | |||||
This inserts itself into :py:data:`sys.meta_path`, storing a dictionary of | This inserts itself into :py:data:`sys.meta_path`, storing a dictionary of | ||||
:py:class:`_LazyModule`\ s (which is added to with :py:meth:`new`). | :py:class:`_LazyModule`\ s (which is added to with :py:meth:`new`). | ||||
""" | """ | ||||
def __init__(self): | def __init__(self): | ||||
self._modules = {} | self._modules = {} | ||||
sys.meta_path.append(self) | sys.meta_path.append(self) | ||||
@@ -1,7 +1,7 @@ | |||||
#! /usr/bin/env python | #! /usr/bin/env python | ||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -21,7 +21,8 @@ | |||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | # SOFTWARE. | ||||
import imp | |||||
import importlib.machinery | |||||
import importlib.util | |||||
from os import listdir, path | from os import listdir, path | ||||
from re import sub | from re import sub | ||||
from threading import RLock, Thread | from threading import RLock, Thread | ||||
@@ -32,6 +33,7 @@ from earwigbot.tasks import Task | |||||
__all__ = ["CommandManager", "TaskManager"] | __all__ = ["CommandManager", "TaskManager"] | ||||
class _ResourceManager: | class _ResourceManager: | ||||
""" | """ | ||||
**EarwigBot: Resource Manager** | **EarwigBot: Resource Manager** | ||||
@@ -48,6 +50,7 @@ class _ResourceManager: | |||||
:py:meth:`load`, retrieving specific resources via :py:meth:`get`, and | :py:meth:`load`, retrieving specific resources via :py:meth:`get`, and | ||||
iterating over all resources via :py:meth:`__iter__`. | iterating over all resources via :py:meth:`__iter__`. | ||||
""" | """ | ||||
def __init__(self, bot, name, base): | def __init__(self, bot, name, base): | ||||
self.bot = bot | self.bot = bot | ||||
self.logger = bot.logger.getChild(name) | self.logger = bot.logger.getChild(name) | ||||
@@ -60,8 +63,9 @@ class _ResourceManager: | |||||
def __repr__(self): | def __repr__(self): | ||||
"""Return the canonical string representation of the manager.""" | """Return the canonical string representation of the manager.""" | ||||
res = "{0}(bot={1!r}, name={2!r}, base={3!r})" | res = "{0}(bot={1!r}, name={2!r}, base={3!r})" | ||||
return res.format(self.__class__.__name__, self.bot, | |||||
self._resource_name, self._resource_base) | |||||
return res.format( | |||||
self.__class__.__name__, self.bot, self._resource_name, self._resource_base | |||||
) | |||||
def __str__(self): | def __str__(self): | ||||
"""Return a nice string representation of the manager.""" | """Return a nice string representation of the manager.""" | ||||
@@ -100,22 +104,22 @@ class _ResourceManager: | |||||
def _load_module(self, name, path): | def _load_module(self, name, path): | ||||
"""Load a specific resource from a module, identified by name and path. | """Load a specific resource from a module, identified by name and path. | ||||
We'll first try to import it using imp magic, and if that works, make | |||||
instances of any classes inside that are subclasses of the base | |||||
We'll first try to import it using importlib magic, and if that works, | |||||
make instances of any classes inside that are subclasses of the base | |||||
(:py:attr:`self._resource_base <_resource_base>`), add them to the | (:py:attr:`self._resource_base <_resource_base>`), add them to the | ||||
resources dictionary with :py:meth:`self._load_resource() | resources dictionary with :py:meth:`self._load_resource() | ||||
<_load_resource>`, and finally log the addition. Any problems along | <_load_resource>`, and finally log the addition. Any problems along | ||||
the way will either be ignored or logged. | the way will either be ignored or logged. | ||||
""" | """ | ||||
f, path, desc = imp.find_module(name, [path]) | |||||
spec = importlib.machinery.PathFinder.find_spec(name, [path]) | |||||
try: | try: | ||||
module = imp.load_module(name, f, path, desc) | |||||
assert spec is not None, "Spec must not be None" | |||||
assert spec.loader is not None, "Loader must not be None" | |||||
module = importlib.util.module_from_spec(spec) | |||||
spec.loader.exec_module(module) | |||||
except Exception: | except Exception: | ||||
e = "Couldn't load module '{0}' (from {1})" | |||||
self.logger.exception(e.format(name, path)) | |||||
self.logger.exception(f"Couldn't load module {name!r} (from {path})") | |||||
return | return | ||||
finally: | |||||
f.close() | |||||
for obj in vars(module).values(): | for obj in vars(module).values(): | ||||
if type(obj) is type: | if type(obj) is type: | ||||
@@ -132,7 +136,7 @@ class _ResourceManager: | |||||
continue | continue | ||||
if name.startswith("_") or name.startswith("."): | if name.startswith("_") or name.startswith("."): | ||||
continue | continue | ||||
modname = sub("\.pyc?$", "", name) # Remove extension | |||||
modname = sub(r"\.pyc?$", "", name) # Remove extension | |||||
if modname in processed: | if modname in processed: | ||||
continue | continue | ||||
processed.append(modname) | processed.append(modname) | ||||
@@ -200,6 +204,7 @@ class CommandManager(_ResourceManager): | |||||
""" | """ | ||||
Manages (i.e., loads, reloads, and calls) IRC commands. | Manages (i.e., loads, reloads, and calls) IRC commands. | ||||
""" | """ | ||||
def __init__(self, bot): | def __init__(self, bot): | ||||
super().__init__(bot, "commands", Command) | super().__init__(bot, "commands", Command) | ||||
@@ -234,8 +239,7 @@ class CommandManager(_ResourceManager): | |||||
for command in self: | for command in self: | ||||
if hook in command.hooks and self._wrap_check(command, data): | if hook in command.hooks and self._wrap_check(command, data): | ||||
thread = Thread(target=self._wrap_process, | |||||
args=(command, data)) | |||||
thread = Thread(target=self._wrap_process, args=(command, data)) | |||||
start_time = strftime("%b %d %H:%M:%S") | start_time = strftime("%b %d %H:%M:%S") | ||||
thread.name = "irc:{0} ({1})".format(command.name, start_time) | thread.name = "irc:{0} ({1})".format(command.name, start_time) | ||||
thread.daemon = True | thread.daemon = True | ||||
@@ -247,6 +251,7 @@ class TaskManager(_ResourceManager): | |||||
""" | """ | ||||
Manages (i.e., loads, reloads, schedules, and runs) wiki bot tasks. | Manages (i.e., loads, reloads, schedules, and runs) wiki bot tasks. | ||||
""" | """ | ||||
def __init__(self, bot): | def __init__(self, bot): | ||||
super().__init__(bot, "tasks", Task) | super().__init__(bot, "tasks", Task) | ||||
@@ -292,11 +297,12 @@ class TaskManager(_ResourceManager): | |||||
if not now: | if not now: | ||||
now = gmtime() | now = gmtime() | ||||
# Get list of tasks to run this turn: | # Get list of tasks to run this turn: | ||||
tasks = self.bot.config.schedule(now.tm_min, now.tm_hour, now.tm_mday, | |||||
now.tm_mon, now.tm_wday) | |||||
tasks = self.bot.config.schedule( | |||||
now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday | |||||
) | |||||
for task in tasks: | for task in tasks: | ||||
if isinstance(task, list): # They've specified kwargs, | |||||
if isinstance(task, list): # They've specified kwargs, | |||||
self.start(task[0], **task[1]) # so pass those to start | self.start(task[0], **task[1]) # so pass those to start | ||||
else: # Otherwise, just pass task_name | else: # Otherwise, just pass task_name | ||||
self.start(task) | self.start(task) |
@@ -146,7 +146,7 @@ class Task: | |||||
try: | try: | ||||
content = page.get() | content = page.get() | ||||
except exceptions.PageNotFoundError: | except exceptions.PageNotFoundError: | ||||
return False | |||||
return True | |||||
if content == cfg.get("disabled", "run"): | if content == cfg.get("disabled", "run"): | ||||
return False | return False | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2009-2017 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -26,6 +26,7 @@ from earwigbot import exceptions | |||||
from earwigbot.tasks import Task | from earwigbot.tasks import Task | ||||
from earwigbot.wiki import constants | from earwigbot.wiki import constants | ||||
class WikiProjectTagger(Task): | class WikiProjectTagger(Task): | ||||
"""A task to tag talk pages with WikiProject banners. | """A task to tag talk pages with WikiProject banners. | ||||
@@ -76,28 +77,24 @@ class WikiProjectTagger(Task): | |||||
edited | edited | ||||
""" | """ | ||||
name = "wikiproject_tagger" | name = "wikiproject_tagger" | ||||
# Regexes for template names that should always go above the banner, based | # Regexes for template names that should always go above the banner, based | ||||
# on [[Wikipedia:Talk page layout]]: | # on [[Wikipedia:Talk page layout]]: | ||||
TOP_TEMPS = [ | TOP_TEMPS = [ | ||||
r"skip ?to ?(toc|talk|toctalk)$", | r"skip ?to ?(toc|talk|toctalk)$", | ||||
r"ga ?nominee$", | r"ga ?nominee$", | ||||
r"(user ?)?talk ?(header|page|page ?header)$", | r"(user ?)?talk ?(header|page|page ?header)$", | ||||
r"community ?article ?probation$", | r"community ?article ?probation$", | ||||
r"censor(-nudity)?$", | r"censor(-nudity)?$", | ||||
r"blp(o| ?others?)?$", | r"blp(o| ?others?)?$", | ||||
r"controvers(ial2?|y)$", | r"controvers(ial2?|y)$", | ||||
r"(not ?(a ?)?)?forum$", | r"(not ?(a ?)?)?forum$", | ||||
r"tv(episode|series)talk$", | r"tv(episode|series)talk$", | ||||
r"recurring ?themes$", | r"recurring ?themes$", | ||||
r"faq$", | r"faq$", | ||||
r"(round ?in ?)?circ(les|ular)$", | r"(round ?in ?)?circ(les|ular)$", | ||||
r"ar(ti|it)cle ?(history|milestones)$", | r"ar(ti|it)cle ?(history|milestones)$", | ||||
r"failed ?ga$", | r"failed ?ga$", | ||||
r"old ?prod( ?full)?$", | r"old ?prod( ?full)?$", | ||||
@@ -144,10 +141,18 @@ class WikiProjectTagger(Task): | |||||
else: | else: | ||||
only_with = None | only_with = None | ||||
job = _Job(banner=banner, names=names, summary=summary, update=update, | |||||
append=append, autoassess=autoassess, only_with=only_with, | |||||
nocreate=nocreate, tag_categories=tag_categories, | |||||
dry_run=dry_run) | |||||
job = _Job( | |||||
banner=banner, | |||||
names=names, | |||||
summary=summary, | |||||
update=update, | |||||
append=append, | |||||
autoassess=autoassess, | |||||
only_with=only_with, | |||||
nocreate=nocreate, | |||||
tag_categories=tag_categories, | |||||
dry_run=dry_run, | |||||
) | |||||
try: | try: | ||||
self.run_job(kwargs, site, job, recursive) | self.run_job(kwargs, site, job, recursive) | ||||
@@ -165,7 +170,6 @@ class WikiProjectTagger(Task): | |||||
with open(kwargs["file"], "r") as fileobj: | with open(kwargs["file"], "r") as fileobj: | ||||
for line in fileobj: | for line in fileobj: | ||||
if line.strip(): | if line.strip(): | ||||
line = line.decode("utf8") | |||||
if line.startswith("[[") and line.endswith("]]"): | if line.startswith("[[") and line.endswith("]]"): | ||||
line = line[2:-2] | line = line[2:-2] | ||||
page = site.get_page(line) | page = site.get_page(line) | ||||
@@ -201,8 +205,13 @@ class WikiProjectTagger(Task): | |||||
return banner, None | return banner, None | ||||
names = {banner, title} | names = {banner, title} | ||||
result = site.api_query(action="query", list="backlinks", bllimit=500, | |||||
blfilterredir="redirects", bltitle=title) | |||||
result = site.api_query( | |||||
action="query", | |||||
list="backlinks", | |||||
bllimit=500, | |||||
blfilterredir="redirects", | |||||
bltitle=title, | |||||
) | |||||
for backlink in result["query"]["backlinks"]: | for backlink in result["query"]["backlinks"]: | ||||
names.add(backlink["title"]) | names.add(backlink["title"]) | ||||
if backlink["ns"] == constants.NS_TEMPLATE: | if backlink["ns"] == constants.NS_TEMPLATE: | ||||
@@ -215,8 +224,9 @@ class WikiProjectTagger(Task): | |||||
def process_category(self, page, job, recursive): | def process_category(self, page, job, recursive): | ||||
"""Try to tag all pages in the given category.""" | """Try to tag all pages in the given category.""" | ||||
if page.title in job.processed_cats: | if page.title in job.processed_cats: | ||||
self.logger.debug("Skipping category, already processed: [[%s]]", | |||||
page.title) | |||||
self.logger.debug( | |||||
"Skipping category, already processed: [[%s]]", page.title | |||||
) | |||||
return | return | ||||
self.logger.info("Processing category: [[%s]]", page.title) | self.logger.info("Processing category: [[%s]]", page.title) | ||||
job.processed_cats.add(page.title) | job.processed_cats.add(page.title) | ||||
@@ -243,8 +253,7 @@ class WikiProjectTagger(Task): | |||||
page = page.toggle_talk() | page = page.toggle_talk() | ||||
if page.title in job.processed_pages: | if page.title in job.processed_pages: | ||||
self.logger.debug("Skipping page, already processed: [[%s]]", | |||||
page.title) | |||||
self.logger.debug("Skipping page, already processed: [[%s]]", page.title) | |||||
return | return | ||||
job.processed_pages.add(page.title) | job.processed_pages.add(page.title) | ||||
@@ -275,21 +284,22 @@ class WikiProjectTagger(Task): | |||||
return | return | ||||
if job.only_with: | if job.only_with: | ||||
if not any(template.name.matches(job.only_with) | |||||
for template in code.ifilter_templates(recursive=True)): | |||||
if not any( | |||||
template.name.matches(job.only_with) | |||||
for template in code.ifilter_templates(recursive=True) | |||||
): | |||||
log = "Skipping page: [[%s]]; fails only-with condition" | log = "Skipping page: [[%s]]; fails only-with condition" | ||||
self.logger.info(log, page.title) | self.logger.info(log, page.title) | ||||
return | return | ||||
if is_update: | if is_update: | ||||
old_banner = str(banner) | |||||
self.update_banner(banner, job, code) | |||||
if banner == old_banner: | |||||
updated = self.update_banner(banner, job, code) | |||||
if not updated: | |||||
log = "Skipping page: [[%s]]; already tagged and no updates" | log = "Skipping page: [[%s]]; already tagged and no updates" | ||||
self.logger.info(log, page.title) | self.logger.info(log, page.title) | ||||
return | return | ||||
self.logger.info("Updating banner on page: [[%s]]", page.title) | self.logger.info("Updating banner on page: [[%s]]", page.title) | ||||
banner = banner.encode("utf8") | |||||
banner = str(banner) | |||||
else: | else: | ||||
self.logger.info("Tagging page: [[%s]]", page.title) | self.logger.info("Tagging page: [[%s]]", page.title) | ||||
banner = self.make_banner(job, code) | banner = self.make_banner(job, code) | ||||
@@ -334,9 +344,11 @@ class WikiProjectTagger(Task): | |||||
def update_banner(self, banner, job, code): | def update_banner(self, banner, job, code): | ||||
"""Update an existing *banner* based on a *job* and a page's *code*.""" | """Update an existing *banner* based on a *job* and a page's *code*.""" | ||||
has = lambda key: (banner.has(key) and | |||||
banner.get(key).value.strip() not in ("", "?")) | |||||
has = lambda key: ( | |||||
banner.has(key) and banner.get(key).value.strip() not in ("", "?") | |||||
) | |||||
updated = False | |||||
if job.autoassess is not False: | if job.autoassess is not False: | ||||
if not has("class"): | if not has("class"): | ||||
assess, reason = self.get_autoassessment(code, job.autoassess) | assess, reason = self.get_autoassessment(code, job.autoassess) | ||||
@@ -349,6 +361,8 @@ class WikiProjectTagger(Task): | |||||
key, value = param.split("=", 1) | key, value = param.split("=", 1) | ||||
if not has(key): | if not has(key): | ||||
banner.add(key, value) | banner.add(key, value) | ||||
updated = True | |||||
return updated | |||||
def get_autoassessment(self, code, only_classes=None): | def get_autoassessment(self, code, only_classes=None): | ||||
"""Get an autoassessment for a page. | """Get an autoassessment for a page. | ||||
@@ -356,16 +370,27 @@ class WikiProjectTagger(Task): | |||||
Return (assessed class as a string or None, assessment reason or None). | Return (assessed class as a string or None, assessment reason or None). | ||||
""" | """ | ||||
if only_classes is None or only_classes is True: | if only_classes is None or only_classes is True: | ||||
classnames = ["a", "b", "book", "c", "dab", "fa", "fl", "ga", | |||||
"list", "redirect", "start", "stub"] | |||||
classnames = [ | |||||
"a", | |||||
"b", | |||||
"book", | |||||
"c", | |||||
"dab", | |||||
"fa", | |||||
"fl", | |||||
"ga", | |||||
"list", | |||||
"redirect", | |||||
"start", | |||||
"stub", | |||||
] | |||||
else: | else: | ||||
classnames = [klass.strip().lower() | |||||
for klass in only_classes.split(",")] | |||||
classnames = [klass.strip().lower() for klass in only_classes.split(",")] | |||||
classes = {klass: 0 for klass in classnames} | classes = {klass: 0 for klass in classnames} | ||||
for template in code.ifilter_templates(recursive=True): | for template in code.ifilter_templates(recursive=True): | ||||
if template.has("class"): | if template.has("class"): | ||||
value = str(template.get("class").value).lower() | |||||
value = str(template.get("class").value).strip().lower() | |||||
if value in classes: | if value in classes: | ||||
classes[value] += 1 | classes[value] += 1 | ||||
@@ -429,6 +454,7 @@ class WikiProjectTagger(Task): | |||||
self.logger.debug("Inserting banner at beginning") | self.logger.debug("Inserting banner at beginning") | ||||
code.insert(0, banner + "\n") | code.insert(0, banner + "\n") | ||||
class _Job: | class _Job: | ||||
"""Represents a single wikiproject-tagging task. | """Represents a single wikiproject-tagging task. | ||||
@@ -436,6 +462,7 @@ class _Job: | |||||
or not to autoassess and create new pages from scratch, and a counter of | or not to autoassess and create new pages from scratch, and a counter of | ||||
the number of pages edited. | the number of pages edited. | ||||
""" | """ | ||||
def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
self.banner = kwargs["banner"] | self.banner = kwargs["banner"] | ||||
self.names = kwargs["names"] | self.names = kwargs["names"] | ||||
@@ -456,4 +483,5 @@ class _Job: | |||||
class _ShutoffEnabled(Exception): | class _ShutoffEnabled(Exception): | ||||
"""Raised by process_page() if shutoff is enabled. Caught by run(), which | """Raised by process_page() if shutoff is enabled. Caught by run(), which | ||||
will then stop the task.""" | will then stop the task.""" | ||||
pass | pass |
@@ -280,7 +280,7 @@ class Page(CopyvioMixIn): | |||||
self._assert_existence() | self._assert_existence() | ||||
def _edit(self, params=None, text=None, summary=None, minor=None, bot=None, | def _edit(self, params=None, text=None, summary=None, minor=None, bot=None, | ||||
force=None, section=None, captcha_id=None, captcha_word=None): | |||||
force=None, section=None, captcha_id=None, captcha_word=None, **kwargs): | |||||
"""Edit the page! | """Edit the page! | ||||
If *params* is given, we'll use it as our API query parameters. | If *params* is given, we'll use it as our API query parameters. | ||||
@@ -297,7 +297,7 @@ class Page(CopyvioMixIn): | |||||
# Build our API query string: | # Build our API query string: | ||||
if not params: | if not params: | ||||
params = self._build_edit_params(text, summary, minor, bot, force, | params = self._build_edit_params(text, summary, minor, bot, force, | ||||
section, captcha_id, captcha_word) | |||||
section, captcha_id, captcha_word, kwargs) | |||||
else: # Make sure we have the right token: | else: # Make sure we have the right token: | ||||
params["token"] = self.site.get_token() | params["token"] = self.site.get_token() | ||||
@@ -320,7 +320,7 @@ class Page(CopyvioMixIn): | |||||
raise exceptions.EditError(result["edit"]) | raise exceptions.EditError(result["edit"]) | ||||
def _build_edit_params(self, text, summary, minor, bot, force, section, | def _build_edit_params(self, text, summary, minor, bot, force, section, | ||||
captcha_id, captcha_word): | |||||
captcha_id, captcha_word, kwargs): | |||||
"""Given some keyword arguments, build an API edit query string.""" | """Given some keyword arguments, build an API edit query string.""" | ||||
unitxt = text.encode("utf8") if isinstance(text, str) else text | unitxt = text.encode("utf8") if isinstance(text, str) else text | ||||
hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct | hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct | ||||
@@ -351,6 +351,11 @@ class Page(CopyvioMixIn): | |||||
else: | else: | ||||
params["recreate"] = "true" | params["recreate"] = "true" | ||||
for key, val in kwargs.items(): | |||||
if val is None: | |||||
params.pop(key, None) | |||||
else: | |||||
params[key] = val | |||||
return params | return params | ||||
def _handle_edit_errors(self, error, params, retry=True): | def _handle_edit_errors(self, error, params, retry=True): | ||||
@@ -657,7 +662,7 @@ class Page(CopyvioMixIn): | |||||
""" | """ | ||||
return mwparserfromhell.parse(self.get()) | return mwparserfromhell.parse(self.get()) | ||||
def edit(self, text, summary, minor=False, bot=True, force=False): | |||||
def edit(self, text, summary, minor=False, bot=True, force=False, **kwargs): | |||||
"""Replace the page's content or creates a new page. | """Replace the page's content or creates a new page. | ||||
*text* is the new page content, with *summary* as the edit summary. | *text* is the new page content, with *summary* as the edit summary. | ||||
@@ -670,9 +675,9 @@ class Page(CopyvioMixIn): | |||||
editing our page. Be careful with this! | editing our page. Be careful with this! | ||||
""" | """ | ||||
self._edit(text=text, summary=summary, minor=minor, bot=bot, | self._edit(text=text, summary=summary, minor=minor, bot=bot, | ||||
force=force) | |||||
force=force, **kwargs) | |||||
def add_section(self, text, title, minor=False, bot=True, force=False): | |||||
def add_section(self, text, title, minor=False, bot=True, force=False, **kwargs): | |||||
"""Add a new section to the bottom of the page. | """Add a new section to the bottom of the page. | ||||
The arguments for this are the same as those for :py:meth:`edit`, but | The arguments for this are the same as those for :py:meth:`edit`, but | ||||
@@ -683,7 +688,7 @@ class Page(CopyvioMixIn): | |||||
new section as content. | new section as content. | ||||
""" | """ | ||||
self._edit(text=text, summary=title, minor=minor, bot=bot, force=force, | self._edit(text=text, summary=title, minor=minor, bot=bot, force=force, | ||||
section="new") | |||||
section="new", **kwargs) | |||||
def check_exclusion(self, username=None, optouts=None): | def check_exclusion(self, username=None, optouts=None): | ||||
"""Check whether or not we are allowed to edit the page. | """Check whether or not we are allowed to edit the page. | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -37,10 +37,11 @@ from earwigbot.wiki.category import Category | |||||
from earwigbot.wiki.page import Page | from earwigbot.wiki.page import Page | ||||
from earwigbot.wiki.user import User | from earwigbot.wiki.user import User | ||||
oursql = importer.new("oursql") | |||||
pymysql = importer.new("pymysql") | |||||
__all__ = ["Site"] | __all__ = ["Site"] | ||||
class Site: | class Site: | ||||
""" | """ | ||||
**EarwigBot: Wiki Toolset: Site** | **EarwigBot: Wiki Toolset: Site** | ||||
@@ -80,18 +81,41 @@ class Site: | |||||
- :py:meth:`get_user`: returns a User object for the given name | - :py:meth:`get_user`: returns a User object for the given name | ||||
- :py:meth:`delegate`: controls when the API or SQL is used | - :py:meth:`delegate`: controls when the API or SQL is used | ||||
""" | """ | ||||
SERVICE_API = 1 | SERVICE_API = 1 | ||||
SERVICE_SQL = 2 | SERVICE_SQL = 2 | ||||
SPECIAL_TOKENS = ["createaccount", "deleteglobalaccount", "login", | |||||
"patrol", "rollback", "setglobalaccountstatus", | |||||
"userrights", "watch"] | |||||
def __init__(self, name=None, project=None, lang=None, base_url=None, | |||||
article_path=None, script_path=None, sql=None, | |||||
namespaces=None, login=(None, None), oauth=None, | |||||
cookiejar=None, user_agent=None, use_https=True, | |||||
assert_edit=None, maxlag=None, wait_between_queries=1, | |||||
logger=None, search_config=None): | |||||
SPECIAL_TOKENS = [ | |||||
"createaccount", | |||||
"deleteglobalaccount", | |||||
"login", | |||||
"patrol", | |||||
"rollback", | |||||
"setglobalaccountstatus", | |||||
"userrights", | |||||
"watch", | |||||
] | |||||
def __init__( | |||||
self, | |||||
name=None, | |||||
project=None, | |||||
lang=None, | |||||
base_url=None, | |||||
article_path=None, | |||||
script_path=None, | |||||
sql=None, | |||||
namespaces=None, | |||||
login=(None, None), | |||||
oauth=None, | |||||
cookiejar=None, | |||||
user_agent=None, | |||||
use_https=True, | |||||
assert_edit=None, | |||||
maxlag=None, | |||||
wait_between_queries=1, | |||||
logger=None, | |||||
search_config=None, | |||||
): | |||||
"""Constructor for new Site instances. | """Constructor for new Site instances. | ||||
This probably isn't necessary to call yourself unless you're building a | This probably isn't necessary to call yourself unless you're building a | ||||
@@ -160,8 +184,11 @@ class Site: | |||||
self._session.headers["User-Agent"] = user_agent | self._session.headers["User-Agent"] = user_agent | ||||
if oauth: | if oauth: | ||||
self._session.auth = OAuth1( | self._session.auth = OAuth1( | ||||
oauth["consumer_token"], oauth["consumer_secret"], | |||||
oauth["access_token"], oauth["access_secret"]) | |||||
oauth["consumer_token"], | |||||
oauth["consumer_secret"], | |||||
oauth["access_token"], | |||||
oauth["access_secret"], | |||||
) | |||||
# Set up our internal logger: | # Set up our internal logger: | ||||
if logger: | if logger: | ||||
@@ -182,13 +209,24 @@ class Site: | |||||
def __repr__(self): | def __repr__(self): | ||||
"""Return the canonical string representation of the Site.""" | """Return the canonical string representation of the Site.""" | ||||
res = ", ".join(( | |||||
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}", | |||||
"base_url={_base_url!r}", "article_path={_article_path!r}", | |||||
"script_path={_script_path!r}", "use_https={_use_https!r}", | |||||
"assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}", | |||||
"sql={_sql_data!r}", "login={0}", "oauth={1}", "user_agent={3!r}", | |||||
"cookiejar={2})")) | |||||
res = ", ".join( | |||||
( | |||||
"Site(name={_name!r}", | |||||
"project={_project!r}", | |||||
"lang={_lang!r}", | |||||
"base_url={_base_url!r}", | |||||
"article_path={_article_path!r}", | |||||
"script_path={_script_path!r}", | |||||
"use_https={_use_https!r}", | |||||
"assert_edit={_assert_edit!r}", | |||||
"maxlag={_maxlag!r}", | |||||
"sql={_sql_data!r}", | |||||
"login={0}", | |||||
"oauth={1}", | |||||
"user_agent={3!r}", | |||||
"cookiejar={2})", | |||||
) | |||||
) | |||||
name, password = self._login_info | name, password = self._login_info | ||||
login = "({0}, {1})".format(repr(name), "hidden" if password else None) | login = "({0}, {1})".format(repr(name), "hidden" if password else None) | ||||
oauth = "hidden" if self._oauth else None | oauth = "hidden" if self._oauth else None | ||||
@@ -211,8 +249,15 @@ class Site: | |||||
return value | return value | ||||
return str(value, encoding) | return str(value, encoding) | ||||
def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False, | |||||
no_assert=False, ae_retry=True): | |||||
def _api_query( | |||||
self, | |||||
params, | |||||
tries=0, | |||||
wait=5, | |||||
ignore_maxlag=False, | |||||
no_assert=False, | |||||
ae_retry=True, | |||||
): | |||||
"""Do an API query with *params* as a dict of parameters. | """Do an API query with *params* as a dict of parameters. | ||||
See the documentation for :py:meth:`api_query` for full implementation | See the documentation for :py:meth:`api_query` for full implementation | ||||
@@ -348,8 +393,14 @@ class Site: | |||||
""" | """ | ||||
# All attributes to be loaded, except _namespaces, which is a special | # All attributes to be loaded, except _namespaces, which is a special | ||||
# case because it requires additional params in the API query: | # case because it requires additional params in the API query: | ||||
attrs = [self._name, self._project, self._lang, self._base_url, | |||||
self._article_path, self._script_path] | |||||
attrs = [ | |||||
self._name, | |||||
self._project, | |||||
self._lang, | |||||
self._base_url, | |||||
self._article_path, | |||||
self._script_path, | |||||
] | |||||
params = {"action": "query", "meta": "siteinfo", "siprop": "general"} | params = {"action": "query", "meta": "siteinfo", "siprop": "general"} | ||||
@@ -359,7 +410,7 @@ class Site: | |||||
result = self._api_query(params, no_assert=True) | result = self._api_query(params, no_assert=True) | ||||
self._load_namespaces(result) | self._load_namespaces(result) | ||||
elif all(attrs): # Everything is already specified and we're not told | elif all(attrs): # Everything is already specified and we're not told | ||||
return # to force a reload, so do nothing | |||||
return # to force a reload, so do nothing | |||||
else: # We're only loading attributes other than _namespaces | else: # We're only loading attributes other than _namespaces | ||||
with self._api_lock: | with self._api_lock: | ||||
result = self._api_query(params, no_assert=True) | result = self._api_query(params, no_assert=True) | ||||
@@ -424,11 +475,11 @@ class Site: | |||||
(for that, we'd do self._login_info[0]), but rather to get our current | (for that, we'd do self._login_info[0]), but rather to get our current | ||||
username without an unnecessary ?action=query&meta=userinfo API query. | username without an unnecessary ?action=query&meta=userinfo API query. | ||||
""" | """ | ||||
name = ''.join((self._name, "Token")) | |||||
name = "".join((self._name, "Token")) | |||||
cookie = self._get_cookie(name, self.domain) | cookie = self._get_cookie(name, self.domain) | ||||
if cookie: | if cookie: | ||||
name = ''.join((self._name, "UserName")) | |||||
name = "".join((self._name, "UserName")) | |||||
user_name = self._get_cookie(name, self.domain) | user_name = self._get_cookie(name, self.domain) | ||||
if user_name: | if user_name: | ||||
return unquote_plus(user_name.value) | return unquote_plus(user_name.value) | ||||
@@ -528,8 +579,12 @@ class Site: | |||||
except KeyError: | except KeyError: | ||||
raise exceptions.LoginError("Couldn't get login token") | raise exceptions.LoginError("Couldn't get login token") | ||||
params = {"action": "login", "lgname": name, "lgpassword": password, | |||||
"lgtoken": token} | |||||
params = { | |||||
"action": "login", | |||||
"lgname": name, | |||||
"lgpassword": password, | |||||
"lgtoken": token, | |||||
} | |||||
with self._api_lock: | with self._api_lock: | ||||
result = self._api_query(params, no_assert=True) | result = self._api_query(params, no_assert=True) | ||||
@@ -564,18 +619,22 @@ class Site: | |||||
def _sql_connect(self, **kwargs): | def _sql_connect(self, **kwargs): | ||||
"""Attempt to establish a connection with this site's SQL database. | """Attempt to establish a connection with this site's SQL database. | ||||
oursql.connect() will be called with self._sql_data as its kwargs. | |||||
pymysql.connect() will be called with self._sql_data as its kwargs. | |||||
Any kwargs given to this function will be passed to connect() and will | Any kwargs given to this function will be passed to connect() and will | ||||
have precedence over the config file. | have precedence over the config file. | ||||
Will raise SQLError() if the module "oursql" is not available. oursql | |||||
may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot | |||||
Will raise SQLError() if the module "pymysql" is not available. pymysql | |||||
may raise its own exceptions (e.g. pymysql.InterfaceError) if it cannot | |||||
establish a connection. | establish a connection. | ||||
""" | """ | ||||
args = self._sql_data | args = self._sql_data | ||||
for key, value in kwargs.items(): | for key, value in kwargs.items(): | ||||
args[key] = value | args[key] = value | ||||
if "read_default_file" not in args and "user" not in args and "passwd" not in args: | |||||
if ( | |||||
"read_default_file" not in args | |||||
and "user" not in args | |||||
and "passwd" not in args | |||||
): | |||||
args["read_default_file"] = expanduser("~/.my.cnf") | args["read_default_file"] = expanduser("~/.my.cnf") | ||||
elif "read_default_file" in args: | elif "read_default_file" in args: | ||||
args["read_default_file"] = expanduser(args["read_default_file"]) | args["read_default_file"] = expanduser(args["read_default_file"]) | ||||
@@ -585,9 +644,9 @@ class Site: | |||||
args["autoreconnect"] = True | args["autoreconnect"] = True | ||||
try: | try: | ||||
self._sql_conn = oursql.connect(**args) | |||||
self._sql_conn = pymysql.connect(**args) | |||||
except ImportError: | except ImportError: | ||||
e = "SQL querying requires the 'oursql' package: https://pythonhosted.org/oursql/" | |||||
e = "SQL querying requires the 'pymysql' package: https://pymysql.readthedocs.io/" | |||||
raise exceptions.SQLError(e) | raise exceptions.SQLError(e) | ||||
def _get_service_order(self): | def _get_service_order(self): | ||||
@@ -608,8 +667,11 @@ class Site: | |||||
if now - self._sql_info_cache["lastcheck"] > 120: | if now - self._sql_info_cache["lastcheck"] > 120: | ||||
self._sql_info_cache["lastcheck"] = now | self._sql_info_cache["lastcheck"] = now | ||||
try: | try: | ||||
self._sql_info_cache["replag"] = sqllag = self.get_replag() | |||||
except (exceptions.SQLError, oursql.Error): | |||||
try: | |||||
self._sql_info_cache["replag"] = sqllag = self.get_replag() | |||||
except pymysql.Error as exc: | |||||
raise exceptions.SQLError(str(exc)) | |||||
except (exceptions.SQLError, ImportError): | |||||
self._sql_info_cache["usable"] = False | self._sql_info_cache["usable"] = False | ||||
return [self.SERVICE_API] | return [self.SERVICE_API] | ||||
self._sql_info_cache["usable"] = True | self._sql_info_cache["usable"] = True | ||||
@@ -705,24 +767,31 @@ class Site: | |||||
with self._api_lock: | with self._api_lock: | ||||
return self._api_query(kwargs) | return self._api_query(kwargs) | ||||
def sql_query(self, query, params=(), plain_query=False, dict_cursor=False, | |||||
cursor_class=None, show_table=False, buffsize=1024): | |||||
def sql_query( | |||||
self, | |||||
query, | |||||
params=(), | |||||
plain_query=False, | |||||
dict_cursor=False, | |||||
cursor_class=None, | |||||
buffsize=1024, | |||||
): | |||||
"""Do an SQL query and yield its results. | """Do an SQL query and yield its results. | ||||
If *plain_query* is ``True``, we will force an unparameterized query. | If *plain_query* is ``True``, we will force an unparameterized query. | ||||
Specifying both *params* and *plain_query* will cause an error. If | Specifying both *params* and *plain_query* will cause an error. If | ||||
*dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as | |||||
our cursor, otherwise the default :py:class:`oursql.Cursor`. If | |||||
*cursor_class* is given, it will override this option. If *show_table* | |||||
is True, the name of the table will be prepended to the name of the | |||||
column. This will mainly affect an :py:class:`~oursql.DictCursor`. | |||||
*dict_cursor* is ``True``, we will use | |||||
:py:class:`pymysql.cursors.DictCursor` as our cursor, otherwise the | |||||
default :py:class:`pymysql.cursors.Cursor`. If *cursor_class* is given, | |||||
it will override this option. | |||||
*buffsize* is the size of each memory-buffered group of results, to | *buffsize* is the size of each memory-buffered group of results, to | ||||
reduce the number of conversations with the database; it is passed to | reduce the number of conversations with the database; it is passed to | ||||
:py:meth:`cursor.fetchmany() <oursql.Cursor.fetchmany>`. If set to | |||||
``0```, all results will be buffered in memory at once (this uses | |||||
:py:meth:`fetchall() <oursql.Cursor.fetchall>`). If set to ``1``, it is | |||||
equivalent to using :py:meth:`fetchone() <oursql.Cursor.fetchone>`. | |||||
:py:meth:`cursor.fetchmany() <pymysql.cursors.Cursor.fetchmany>`. If | |||||
set to ``0```, all results will be buffered in memory at once (this | |||||
uses :py:meth:`fetchall() <pymysql.cursors.Cursor.fetchall>`). If set | |||||
to ``1``, it is equivalent to using | |||||
:py:meth:`fetchone() <pymysql.cursors.Cursor.fetchone>`. | |||||
Example usage:: | Example usage:: | ||||
@@ -736,25 +805,25 @@ class Site: | |||||
{'user_id': 7418060L, 'user_registration': '20080703215134'} | {'user_id': 7418060L, 'user_registration': '20080703215134'} | ||||
This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of | This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of | ||||
oursql's exceptions (:py:exc:`oursql.ProgrammingError`, | |||||
:py:exc:`oursql.InterfaceError`, ...) if there were problems with the | |||||
pymysql's exceptions (:py:exc:`pymysql.ProgrammingError`, | |||||
:py:exc:`pymysql.InterfaceError`, ...) if there were problems with the | |||||
query. | query. | ||||
See :py:meth:`_sql_connect` for information on how a connection is | See :py:meth:`_sql_connect` for information on how a connection is | ||||
acquired. Also relevant is `oursql's documentation | |||||
<https://pythonhosted.org/oursql/>`_ for details on that package. | |||||
acquired. Also relevant is `pymysql's documentation | |||||
<https://pymysql.readthedocs.io/>`_ for details on that package. | |||||
""" | """ | ||||
if not cursor_class: | if not cursor_class: | ||||
if dict_cursor: | if dict_cursor: | ||||
cursor_class = oursql.DictCursor | |||||
cursor_class = pymysql.cursors.DictCursor | |||||
else: | else: | ||||
cursor_class = oursql.Cursor | |||||
cursor_class = pymysql.cursors.Cursor | |||||
klass = cursor_class | klass = cursor_class | ||||
with self._sql_lock: | with self._sql_lock: | ||||
if not self._sql_conn: | if not self._sql_conn: | ||||
self._sql_connect() | self._sql_connect() | ||||
with self._sql_conn.cursor(klass, show_table=show_table) as cur: | |||||
with self._sql_conn.cursor(klass) as cur: | |||||
cur.execute(query, params, plain_query) | cur.execute(query, params, plain_query) | ||||
if buffsize: | if buffsize: | ||||
while True: | while True: | ||||
@@ -798,8 +867,8 @@ class Site: | |||||
time from the timestamp of the latest recent changes event. | time from the timestamp of the latest recent changes event. | ||||
This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of | This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of | ||||
oursql's exceptions (:py:exc:`oursql.ProgrammingError`, | |||||
:py:exc:`oursql.InterfaceError`, ...) if there were problems. | |||||
pymysql's exceptions (:py:exc:`pymysql.ProgrammingError`, | |||||
:py:exc:`pymysql.InterfaceError`, ...) if there were problems. | |||||
""" | """ | ||||
query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM | query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM | ||||
recentchanges ORDER BY rc_timestamp DESC LIMIT 1""" | recentchanges ORDER BY rc_timestamp DESC LIMIT 1""" | ||||
@@ -886,8 +955,7 @@ class Site: | |||||
prefix = title.split(":", 1)[0] | prefix = title.split(":", 1)[0] | ||||
if prefix != title: # Avoid a page that is simply "Category" | if prefix != title: # Avoid a page that is simply "Category" | ||||
if prefix in prefixes: | if prefix in prefixes: | ||||
return Category(self, title, follow_redirects, pageid, | |||||
self._logger) | |||||
return Category(self, title, follow_redirects, pageid, self._logger) | |||||
return Page(self, title, follow_redirects, pageid, self._logger) | return Page(self, title, follow_redirects, pageid, self._logger) | ||||
def get_category(self, catname, follow_redirects=False, pageid=None): | def get_category(self, catname, follow_redirects=False, pageid=None): | ||||
@@ -899,7 +967,7 @@ class Site: | |||||
""" | """ | ||||
catname = self._unicodeify(catname) | catname = self._unicodeify(catname) | ||||
prefix = self.namespace_id_to_name(constants.NS_CATEGORY) | prefix = self.namespace_id_to_name(constants.NS_CATEGORY) | ||||
pagename = ':'.join((prefix, catname)) | |||||
pagename = ":".join((prefix, catname)) | |||||
return Category(self, pagename, follow_redirects, pageid, self._logger) | return Category(self, pagename, follow_redirects, pageid, self._logger) | ||||
def get_user(self, username=None): | def get_user(self, username=None): | ||||
@@ -1,6 +1,6 @@ | |||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -35,6 +35,7 @@ from earwigbot.wiki.site import Site | |||||
__all__ = ["SitesDB"] | __all__ = ["SitesDB"] | ||||
class SitesDB: | class SitesDB: | ||||
""" | """ | ||||
**EarwigBot: Wiki Toolset: Sites Database Manager** | **EarwigBot: Wiki Toolset: Sites Database Manager** | ||||
@@ -106,7 +107,7 @@ class SitesDB: | |||||
# Create the file and restrict reading/writing only to the | # Create the file and restrict reading/writing only to the | ||||
# owner, so others can't peak at our cookies: | # owner, so others can't peak at our cookies: | ||||
open(self._cookie_file, "w").close() | open(self._cookie_file, "w").close() | ||||
chmod(self._cookie_file, stat.S_IRUSR|stat.S_IWUSR) | |||||
chmod(self._cookie_file, stat.S_IRUSR | stat.S_IWUSR) | |||||
else: | else: | ||||
raise | raise | ||||
@@ -172,8 +173,16 @@ class SitesDB: | |||||
except KeyError: | except KeyError: | ||||
namespaces[ns_id] = [ns_name] | namespaces[ns_id] = [ns_name] | ||||
return (name, project, lang, base_url, article_path, script_path, sql, | |||||
namespaces) | |||||
return ( | |||||
name, | |||||
project, | |||||
lang, | |||||
base_url, | |||||
article_path, | |||||
script_path, | |||||
sql, | |||||
namespaces, | |||||
) | |||||
def _make_site_object(self, name): | def _make_site_object(self, name): | ||||
"""Return a Site object associated with the site *name* in our sitesdb. | """Return a Site object associated with the site *name* in our sitesdb. | ||||
@@ -182,8 +191,9 @@ class SitesDB: | |||||
raised if the site is not in our sitesdb. | raised if the site is not in our sitesdb. | ||||
""" | """ | ||||
cookiejar = self._get_cookiejar() | cookiejar = self._get_cookiejar() | ||||
(name, project, lang, base_url, article_path, script_path, sql, | |||||
namespaces) = self._load_site_from_sitesdb(name) | |||||
(name, project, lang, base_url, article_path, script_path, sql, namespaces) = ( | |||||
self._load_site_from_sitesdb(name) | |||||
) | |||||
config = self.config | config = self.config | ||||
login = (config.wiki.get("username"), config.wiki.get("password")) | login = (config.wiki.get("username"), config.wiki.get("password")) | ||||
@@ -211,13 +221,26 @@ class SitesDB: | |||||
if isinstance(value, str) and "$1" in value: | if isinstance(value, str) and "$1" in value: | ||||
sql[key] = value.replace("$1", name) | sql[key] = value.replace("$1", name) | ||||
return Site(name=name, project=project, lang=lang, base_url=base_url, | |||||
article_path=article_path, script_path=script_path, | |||||
sql=sql, namespaces=namespaces, login=login, oauth=oauth, | |||||
cookiejar=cookiejar, user_agent=user_agent, | |||||
use_https=use_https, assert_edit=assert_edit, | |||||
maxlag=maxlag, wait_between_queries=wait_between_queries, | |||||
logger=logger, search_config=search_config) | |||||
return Site( | |||||
name=name, | |||||
project=project, | |||||
lang=lang, | |||||
base_url=base_url, | |||||
article_path=article_path, | |||||
script_path=script_path, | |||||
sql=sql, | |||||
namespaces=namespaces, | |||||
login=login, | |||||
oauth=oauth, | |||||
cookiejar=cookiejar, | |||||
user_agent=user_agent, | |||||
use_https=use_https, | |||||
assert_edit=assert_edit, | |||||
maxlag=maxlag, | |||||
wait_between_queries=wait_between_queries, | |||||
logger=logger, | |||||
search_config=search_config, | |||||
) | |||||
def _get_site_name_from_sitesdb(self, project, lang): | def _get_site_name_from_sitesdb(self, project, lang): | ||||
"""Return the name of the first site with the given project and lang. | """Return the name of the first site with the given project and lang. | ||||
@@ -255,8 +278,14 @@ class SitesDB: | |||||
database. If the sitesdb doesn't exist, we'll create it first. | database. If the sitesdb doesn't exist, we'll create it first. | ||||
""" | """ | ||||
name = site.name | name = site.name | ||||
sites_data = (name, site.project, site.lang, site._base_url, | |||||
site._article_path, site._script_path) | |||||
sites_data = ( | |||||
name, | |||||
site.project, | |||||
site.lang, | |||||
site._base_url, | |||||
site._article_path, | |||||
site._script_path, | |||||
) | |||||
sql_data = [(name, key, val) for key, val in site._sql_data.items()] | sql_data = [(name, key, val) for key, val in site._sql_data.items()] | ||||
ns_data = [] | ns_data = [] | ||||
for ns_id, ns_names in site._namespaces.items(): | for ns_id, ns_names in site._namespaces.items(): | ||||
@@ -353,8 +382,9 @@ class SitesDB: | |||||
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) | e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) | ||||
raise SiteNotFoundError(e) | raise SiteNotFoundError(e) | ||||
def add_site(self, project=None, lang=None, base_url=None, | |||||
script_path="/w", sql=None): | |||||
def add_site( | |||||
self, project=None, lang=None, base_url=None, script_path="/w", sql=None | |||||
): | |||||
"""Add a site to the sitesdb so it can be retrieved with get_site(). | """Add a site to the sitesdb so it can be retrieved with get_site(). | ||||
If only a project and a lang are given, we'll guess the *base_url* as | If only a project and a lang are given, we'll guess the *base_url* as | ||||
@@ -368,8 +398,8 @@ class SitesDB: | |||||
your wiki is different, provide the script_path as an argument. SQL | your wiki is different, provide the script_path as an argument. SQL | ||||
connection settings are guessed automatically using config's template | connection settings are guessed automatically using config's template | ||||
value. If this is wrong or not specified, provide a dict of kwargs as | value. If this is wrong or not specified, provide a dict of kwargs as | ||||
*sql* and Site will pass it to :py:func:`oursql.connect(**sql) | |||||
<oursql.connect>`, allowing you to make queries with | |||||
*sql* and Site will pass it to :py:func:`pymysql.connect(**sql) | |||||
<pymysql.connect>`, allowing you to make queries with | |||||
:py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | :py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`. | ||||
Returns ``True`` if the site was added successfully or ``False`` if the | Returns ``True`` if the site was added successfully or ``False`` if the | ||||
@@ -399,11 +429,19 @@ class SitesDB: | |||||
user_agent = user_agent.replace("$2", python_version()) | user_agent = user_agent.replace("$2", python_version()) | ||||
# Create a Site object to log in and load the other attributes: | # Create a Site object to log in and load the other attributes: | ||||
site = Site(base_url=base_url, script_path=script_path, sql=sql, | |||||
login=login, oauth=oauth, cookiejar=cookiejar, | |||||
user_agent=user_agent, use_https=use_https, | |||||
assert_edit=assert_edit, maxlag=maxlag, | |||||
wait_between_queries=wait_between_queries) | |||||
site = Site( | |||||
base_url=base_url, | |||||
script_path=script_path, | |||||
sql=sql, | |||||
login=login, | |||||
oauth=oauth, | |||||
cookiejar=cookiejar, | |||||
user_agent=user_agent, | |||||
use_https=use_https, | |||||
assert_edit=assert_edit, | |||||
maxlag=maxlag, | |||||
wait_between_queries=wait_between_queries, | |||||
) | |||||
self._logger.info("Added site '{0}'".format(site.name)) | self._logger.info("Added site '{0}'".format(site.name)) | ||||
self._add_site_to_sitesdb(site) | self._add_site_to_sitesdb(site) | ||||
@@ -1,7 +1,7 @@ | |||||
#! /usr/bin/env python | #! /usr/bin/env python | ||||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
# | # | ||||
# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com> | |||||
# | # | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | # Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | # of this software and associated documentation files (the "Software"), to deal | ||||
@@ -37,11 +37,11 @@ extra_deps = { | |||||
"cryptography >= 3.4.7", # Storing bot passwords + keys in the config file | "cryptography >= 3.4.7", # Storing bot passwords + keys in the config file | ||||
], | ], | ||||
"sql": [ | "sql": [ | ||||
"oursql3 >= 0.9.4", # Interfacing with MediaWiki databases | |||||
"pymysql >= 1.1.0", # Interfacing with MediaWiki databases | |||||
], | ], | ||||
"copyvios": [ | "copyvios": [ | ||||
"beautifulsoup4 >= 4.9.3", # Parsing/scraping HTML | "beautifulsoup4 >= 4.9.3", # Parsing/scraping HTML | ||||
"cchardet >= 2.1.7", # Encoding detection for BeautifulSoup | |||||
"charset_normalizer >= 3.3.2", # Encoding detection for BeautifulSoup | |||||
"lxml >= 4.6.3", # Faster parser for BeautifulSoup | "lxml >= 4.6.3", # Faster parser for BeautifulSoup | ||||
"nltk >= 3.6.1", # Parsing sentences to split article content | "nltk >= 3.6.1", # Parsing sentences to split article content | ||||
"pdfminer >= 20191125", # Extracting text from PDF files | "pdfminer >= 20191125", # Extracting text from PDF files | ||||
@@ -58,21 +58,21 @@ with open("README.rst") as fp: | |||||
long_docs = fp.read() | long_docs = fp.read() | ||||
setup( | setup( | ||||
name = "earwigbot", | |||||
packages = find_packages(exclude=("tests",)), | |||||
entry_points = {"console_scripts": ["earwigbot = earwigbot.util:main"]}, | |||||
install_requires = dependencies, | |||||
test_suite = "tests", | |||||
version = __version__, | |||||
author = "Ben Kurtovic", | |||||
author_email = "ben.kurtovic@gmail.com", | |||||
url = "https://github.com/earwig/earwigbot", | |||||
description = "EarwigBot is a Python robot that edits Wikipedia and interacts with people over IRC.", | |||||
long_description = long_docs, | |||||
download_url = "https://github.com/earwig/earwigbot/tarball/v{0}".format(__version__), | |||||
keywords = "earwig earwigbot irc wikipedia wiki mediawiki", | |||||
license = "MIT License", | |||||
classifiers = [ | |||||
name="earwigbot", | |||||
packages=find_packages(exclude=("tests",)), | |||||
entry_points={"console_scripts": ["earwigbot = earwigbot.util:main"]}, | |||||
install_requires=dependencies, | |||||
test_suite="tests", | |||||
version=__version__, | |||||
author="Ben Kurtovic", | |||||
author_email="ben.kurtovic@gmail.com", | |||||
url="https://github.com/earwig/earwigbot", | |||||
description="EarwigBot is a Python robot that edits Wikipedia and interacts with people over IRC.", | |||||
long_description=long_docs, | |||||
download_url="https://github.com/earwig/earwigbot/tarball/v{0}".format(__version__), | |||||
keywords="earwig earwigbot irc wikipedia wiki mediawiki", | |||||
license="MIT License", | |||||
classifiers=[ | |||||
"Development Status :: 3 - Alpha", | "Development Status :: 3 - Alpha", | ||||
"Environment :: Console", | "Environment :: Console", | ||||
"Intended Audience :: Developers", | "Intended Audience :: Developers", | ||||
@@ -81,6 +81,6 @@ setup( | |||||
"Operating System :: OS Independent", | "Operating System :: OS Independent", | ||||
"Programming Language :: Python :: 3", | "Programming Language :: Python :: 3", | ||||
"Topic :: Communications :: Chat :: Internet Relay Chat", | "Topic :: Communications :: Chat :: Internet Relay Chat", | ||||
"Topic :: Internet :: WWW/HTTP" | |||||
"Topic :: Internet :: WWW/HTTP", | |||||
], | ], | ||||
) | ) |