Browse Source

Python 3.11+ cleanup and bugfixes

tags/v0.4
Ben Kurtovic 7 months ago
parent
commit
10bc4b3fd4
10 changed files with 335 additions and 184 deletions
  1. +3
    -1
      CHANGELOG
  2. +19
    -16
      earwigbot/irc/watcher.py
  3. +8
    -7
      earwigbot/lazy.py
  4. +24
    -18
      earwigbot/managers.py
  5. +1
    -1
      earwigbot/tasks/__init__.py
  6. +58
    -30
      earwigbot/tasks/wikiproject_tagger.py
  7. +12
    -7
      earwigbot/wiki/page.py
  8. +129
    -61
      earwigbot/wiki/site.py
  9. +62
    -24
      earwigbot/wiki/sitesdb.py
  10. +19
    -19
      setup.py

+ 3
- 1
CHANGELOG View File

@@ -1,12 +1,14 @@
v0.4 (unreleased): v0.4 (unreleased):


- Migrated to Python 3.
- Migrated to Python 3 (3.11+).
- Migrated from oursql to pymysql.
- Copyvios: Configurable proxy support for specific domains. - Copyvios: Configurable proxy support for specific domains.
- Copyvios: Parser-directed URL redirection. - Copyvios: Parser-directed URL redirection.
- Copyvios: General parsing improvements. - Copyvios: General parsing improvements.
- Copyvios: URL exclusion improvements. - Copyvios: URL exclusion improvements.
- Copyvios: Removed long-deprecated Yahoo! BOSS search engine. - Copyvios: Removed long-deprecated Yahoo! BOSS search engine.
- Wiki: Fixed not sending Content-Type header in POST requests. - Wiki: Fixed not sending Content-Type header in POST requests.
- IRC: Moved default server from Freenode to Libera.
- IRC: Remember joined channels across restarts. - IRC: Remember joined channels across restarts.
- IRC: Added !listchans. - IRC: Added !listchans.
- IRC > !stalk: Added modifiers to change message format or filter messages. - IRC > !stalk: Added modifiers to change message format or filter messages.


+ 19
- 16
earwigbot/irc/watcher.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -20,12 +20,14 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


import imp
import importlib.machinery
import importlib.util


from earwigbot.irc import IRCConnection, RC from earwigbot.irc import IRCConnection, RC


__all__ = ["Watcher"] __all__ = ["Watcher"]



class Watcher(IRCConnection): class Watcher(IRCConnection):
""" """
**EarwigBot: IRC Watcher Component** **EarwigBot: IRC Watcher Component**
@@ -40,16 +42,23 @@ class Watcher(IRCConnection):
def __init__(self, bot): def __init__(self, bot):
self.bot = bot self.bot = bot
cf = bot.config.irc["watcher"] cf = bot.config.irc["watcher"]
super().__init__(cf["host"], cf["port"], cf["nick"], cf["ident"],
cf["realname"], bot.logger.getChild("watcher"))
super().__init__(
cf["host"],
cf["port"],
cf["nick"],
cf["ident"],
cf["realname"],
bot.logger.getChild("watcher"),
)
self._prepare_process_hook() self._prepare_process_hook()
self._connect() self._connect()


def __repr__(self): def __repr__(self):
"""Return the canonical string representation of the Watcher.""" """Return the canonical string representation of the Watcher."""
res = "Watcher(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})" res = "Watcher(host={0!r}, port={1!r}, nick={2!r}, ident={3!r}, realname={4!r}, bot={5!r})"
return res.format(self.host, self.port, self.nick, self.ident,
self.realname, self.bot)
return res.format(
self.host, self.port, self.nick, self.ident, self.realname, self.bot
)


def __str__(self): def __str__(self):
"""Return a nice string representation of the Watcher.""" """Return a nice string representation of the Watcher."""
@@ -88,17 +97,11 @@ class Watcher(IRCConnection):
self._process_hook = lambda bot, rc: () self._process_hook = lambda bot, rc: ()


path = self.bot.config.root_dir path = self.bot.config.root_dir
try:
f, path, desc = imp.find_module("rules", [path])
except ImportError:
spec = importlib.machinery.PathFinder.find_spec("rules", [path])
if spec is None or spec.loader is None:
return return
try:
module = imp.load_module("rules", f, path, desc)
except Exception:
return
finally:
f.close()

module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
self._process_hook_module = module self._process_hook_module = module
try: try:
self._process_hook = module.process self._process_hook = module.process


+ 8
- 7
earwigbot/lazy.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -27,7 +27,6 @@ manner, so that they can be referred to by name but are not actually loaded
until they are used (i.e. their attributes are read or modified). until they are used (i.e. their attributes are read or modified).
""" """


from imp import acquire_lock, release_lock
import importlib import importlib
import sys import sys
from threading import RLock from threading import RLock
@@ -36,12 +35,16 @@ from types import ModuleType
__all__ = ["LazyImporter"] __all__ = ["LazyImporter"]


_real_get = ModuleType.__getattribute__ _real_get = ModuleType.__getattribute__
_lazy_init_lock = RLock()



def _create_failing_get(exc): def _create_failing_get(exc):
def _fail(self, attr): def _fail(self, attr):
raise exc raise exc

return _fail return _fail



def _mock_get(self, attr): def _mock_get(self, attr):
with _real_get(self, "_lock"): with _real_get(self, "_lock"):
if _real_get(self, "_unloaded"): if _real_get(self, "_unloaded"):
@@ -59,14 +62,13 @@ def _mock_get(self, attr):


class _LazyModule(type): class _LazyModule(type):
def __new__(cls, name): def __new__(cls, name):
acquire_lock()
try:
with _lazy_init_lock:
if name not in sys.modules: if name not in sys.modules:
attributes = { attributes = {
"__name__": name, "__name__": name,
"__getattribute__": _mock_get, "__getattribute__": _mock_get,
"_unloaded": True, "_unloaded": True,
"_lock": RLock()
"_lock": RLock(),
} }
parents = (ModuleType,) parents = (ModuleType,)
klass = type.__new__(cls, "module", parents, attributes) klass = type.__new__(cls, "module", parents, attributes)
@@ -74,8 +76,6 @@ class _LazyModule(type):
if "." in name: # Also ensure the parent exists if "." in name: # Also ensure the parent exists
_LazyModule(name.rsplit(".", 1)[0]) _LazyModule(name.rsplit(".", 1)[0])
return sys.modules[name] return sys.modules[name]
finally:
release_lock()




class LazyImporter: class LazyImporter:
@@ -84,6 +84,7 @@ class LazyImporter:
This inserts itself into :py:data:`sys.meta_path`, storing a dictionary of This inserts itself into :py:data:`sys.meta_path`, storing a dictionary of
:py:class:`_LazyModule`\ s (which is added to with :py:meth:`new`). :py:class:`_LazyModule`\ s (which is added to with :py:meth:`new`).
""" """

def __init__(self): def __init__(self):
self._modules = {} self._modules = {}
sys.meta_path.append(self) sys.meta_path.append(self)


+ 24
- 18
earwigbot/managers.py View File

@@ -1,7 +1,7 @@
#! /usr/bin/env python #! /usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2009-2015 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -21,7 +21,8 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE. # SOFTWARE.


import imp
import importlib.machinery
import importlib.util
from os import listdir, path from os import listdir, path
from re import sub from re import sub
from threading import RLock, Thread from threading import RLock, Thread
@@ -32,6 +33,7 @@ from earwigbot.tasks import Task


__all__ = ["CommandManager", "TaskManager"] __all__ = ["CommandManager", "TaskManager"]



class _ResourceManager: class _ResourceManager:
""" """
**EarwigBot: Resource Manager** **EarwigBot: Resource Manager**
@@ -48,6 +50,7 @@ class _ResourceManager:
:py:meth:`load`, retrieving specific resources via :py:meth:`get`, and :py:meth:`load`, retrieving specific resources via :py:meth:`get`, and
iterating over all resources via :py:meth:`__iter__`. iterating over all resources via :py:meth:`__iter__`.
""" """

def __init__(self, bot, name, base): def __init__(self, bot, name, base):
self.bot = bot self.bot = bot
self.logger = bot.logger.getChild(name) self.logger = bot.logger.getChild(name)
@@ -60,8 +63,9 @@ class _ResourceManager:
def __repr__(self): def __repr__(self):
"""Return the canonical string representation of the manager.""" """Return the canonical string representation of the manager."""
res = "{0}(bot={1!r}, name={2!r}, base={3!r})" res = "{0}(bot={1!r}, name={2!r}, base={3!r})"
return res.format(self.__class__.__name__, self.bot,
self._resource_name, self._resource_base)
return res.format(
self.__class__.__name__, self.bot, self._resource_name, self._resource_base
)


def __str__(self): def __str__(self):
"""Return a nice string representation of the manager.""" """Return a nice string representation of the manager."""
@@ -100,22 +104,22 @@ class _ResourceManager:
def _load_module(self, name, path): def _load_module(self, name, path):
"""Load a specific resource from a module, identified by name and path. """Load a specific resource from a module, identified by name and path.


We'll first try to import it using imp magic, and if that works, make
instances of any classes inside that are subclasses of the base
We'll first try to import it using importlib magic, and if that works,
make instances of any classes inside that are subclasses of the base
(:py:attr:`self._resource_base <_resource_base>`), add them to the (:py:attr:`self._resource_base <_resource_base>`), add them to the
resources dictionary with :py:meth:`self._load_resource() resources dictionary with :py:meth:`self._load_resource()
<_load_resource>`, and finally log the addition. Any problems along <_load_resource>`, and finally log the addition. Any problems along
the way will either be ignored or logged. the way will either be ignored or logged.
""" """
f, path, desc = imp.find_module(name, [path])
spec = importlib.machinery.PathFinder.find_spec(name, [path])
try: try:
module = imp.load_module(name, f, path, desc)
assert spec is not None, "Spec must not be None"
assert spec.loader is not None, "Loader must not be None"
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
except Exception: except Exception:
e = "Couldn't load module '{0}' (from {1})"
self.logger.exception(e.format(name, path))
self.logger.exception(f"Couldn't load module {name!r} (from {path})")
return return
finally:
f.close()


for obj in vars(module).values(): for obj in vars(module).values():
if type(obj) is type: if type(obj) is type:
@@ -132,7 +136,7 @@ class _ResourceManager:
continue continue
if name.startswith("_") or name.startswith("."): if name.startswith("_") or name.startswith("."):
continue continue
modname = sub("\.pyc?$", "", name) # Remove extension
modname = sub(r"\.pyc?$", "", name) # Remove extension
if modname in processed: if modname in processed:
continue continue
processed.append(modname) processed.append(modname)
@@ -200,6 +204,7 @@ class CommandManager(_ResourceManager):
""" """
Manages (i.e., loads, reloads, and calls) IRC commands. Manages (i.e., loads, reloads, and calls) IRC commands.
""" """

def __init__(self, bot): def __init__(self, bot):
super().__init__(bot, "commands", Command) super().__init__(bot, "commands", Command)


@@ -234,8 +239,7 @@ class CommandManager(_ResourceManager):


for command in self: for command in self:
if hook in command.hooks and self._wrap_check(command, data): if hook in command.hooks and self._wrap_check(command, data):
thread = Thread(target=self._wrap_process,
args=(command, data))
thread = Thread(target=self._wrap_process, args=(command, data))
start_time = strftime("%b %d %H:%M:%S") start_time = strftime("%b %d %H:%M:%S")
thread.name = "irc:{0} ({1})".format(command.name, start_time) thread.name = "irc:{0} ({1})".format(command.name, start_time)
thread.daemon = True thread.daemon = True
@@ -247,6 +251,7 @@ class TaskManager(_ResourceManager):
""" """
Manages (i.e., loads, reloads, schedules, and runs) wiki bot tasks. Manages (i.e., loads, reloads, schedules, and runs) wiki bot tasks.
""" """

def __init__(self, bot): def __init__(self, bot):
super().__init__(bot, "tasks", Task) super().__init__(bot, "tasks", Task)


@@ -292,11 +297,12 @@ class TaskManager(_ResourceManager):
if not now: if not now:
now = gmtime() now = gmtime()
# Get list of tasks to run this turn: # Get list of tasks to run this turn:
tasks = self.bot.config.schedule(now.tm_min, now.tm_hour, now.tm_mday,
now.tm_mon, now.tm_wday)
tasks = self.bot.config.schedule(
now.tm_min, now.tm_hour, now.tm_mday, now.tm_mon, now.tm_wday
)


for task in tasks: for task in tasks:
if isinstance(task, list): # They've specified kwargs,
if isinstance(task, list): # They've specified kwargs,
self.start(task[0], **task[1]) # so pass those to start self.start(task[0], **task[1]) # so pass those to start
else: # Otherwise, just pass task_name else: # Otherwise, just pass task_name
self.start(task) self.start(task)

+ 1
- 1
earwigbot/tasks/__init__.py View File

@@ -146,7 +146,7 @@ class Task:
try: try:
content = page.get() content = page.get()
except exceptions.PageNotFoundError: except exceptions.PageNotFoundError:
return False
return True
if content == cfg.get("disabled", "run"): if content == cfg.get("disabled", "run"):
return False return False




+ 58
- 30
earwigbot/tasks/wikiproject_tagger.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2009-2017 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -26,6 +26,7 @@ from earwigbot import exceptions
from earwigbot.tasks import Task from earwigbot.tasks import Task
from earwigbot.wiki import constants from earwigbot.wiki import constants



class WikiProjectTagger(Task): class WikiProjectTagger(Task):
"""A task to tag talk pages with WikiProject banners. """A task to tag talk pages with WikiProject banners.


@@ -76,28 +77,24 @@ class WikiProjectTagger(Task):
edited edited


""" """

name = "wikiproject_tagger" name = "wikiproject_tagger"


# Regexes for template names that should always go above the banner, based # Regexes for template names that should always go above the banner, based
# on [[Wikipedia:Talk page layout]]: # on [[Wikipedia:Talk page layout]]:
TOP_TEMPS = [ TOP_TEMPS = [
r"skip ?to ?(toc|talk|toctalk)$", r"skip ?to ?(toc|talk|toctalk)$",

r"ga ?nominee$", r"ga ?nominee$",

r"(user ?)?talk ?(header|page|page ?header)$", r"(user ?)?talk ?(header|page|page ?header)$",

r"community ?article ?probation$", r"community ?article ?probation$",
r"censor(-nudity)?$", r"censor(-nudity)?$",
r"blp(o| ?others?)?$", r"blp(o| ?others?)?$",
r"controvers(ial2?|y)$", r"controvers(ial2?|y)$",

r"(not ?(a ?)?)?forum$", r"(not ?(a ?)?)?forum$",
r"tv(episode|series)talk$", r"tv(episode|series)talk$",
r"recurring ?themes$", r"recurring ?themes$",
r"faq$", r"faq$",
r"(round ?in ?)?circ(les|ular)$", r"(round ?in ?)?circ(les|ular)$",

r"ar(ti|it)cle ?(history|milestones)$", r"ar(ti|it)cle ?(history|milestones)$",
r"failed ?ga$", r"failed ?ga$",
r"old ?prod( ?full)?$", r"old ?prod( ?full)?$",
@@ -144,10 +141,18 @@ class WikiProjectTagger(Task):
else: else:
only_with = None only_with = None


job = _Job(banner=banner, names=names, summary=summary, update=update,
append=append, autoassess=autoassess, only_with=only_with,
nocreate=nocreate, tag_categories=tag_categories,
dry_run=dry_run)
job = _Job(
banner=banner,
names=names,
summary=summary,
update=update,
append=append,
autoassess=autoassess,
only_with=only_with,
nocreate=nocreate,
tag_categories=tag_categories,
dry_run=dry_run,
)


try: try:
self.run_job(kwargs, site, job, recursive) self.run_job(kwargs, site, job, recursive)
@@ -165,7 +170,6 @@ class WikiProjectTagger(Task):
with open(kwargs["file"], "r") as fileobj: with open(kwargs["file"], "r") as fileobj:
for line in fileobj: for line in fileobj:
if line.strip(): if line.strip():
line = line.decode("utf8")
if line.startswith("[[") and line.endswith("]]"): if line.startswith("[[") and line.endswith("]]"):
line = line[2:-2] line = line[2:-2]
page = site.get_page(line) page = site.get_page(line)
@@ -201,8 +205,13 @@ class WikiProjectTagger(Task):
return banner, None return banner, None


names = {banner, title} names = {banner, title}
result = site.api_query(action="query", list="backlinks", bllimit=500,
blfilterredir="redirects", bltitle=title)
result = site.api_query(
action="query",
list="backlinks",
bllimit=500,
blfilterredir="redirects",
bltitle=title,
)
for backlink in result["query"]["backlinks"]: for backlink in result["query"]["backlinks"]:
names.add(backlink["title"]) names.add(backlink["title"])
if backlink["ns"] == constants.NS_TEMPLATE: if backlink["ns"] == constants.NS_TEMPLATE:
@@ -215,8 +224,9 @@ class WikiProjectTagger(Task):
def process_category(self, page, job, recursive): def process_category(self, page, job, recursive):
"""Try to tag all pages in the given category.""" """Try to tag all pages in the given category."""
if page.title in job.processed_cats: if page.title in job.processed_cats:
self.logger.debug("Skipping category, already processed: [[%s]]",
page.title)
self.logger.debug(
"Skipping category, already processed: [[%s]]", page.title
)
return return
self.logger.info("Processing category: [[%s]]", page.title) self.logger.info("Processing category: [[%s]]", page.title)
job.processed_cats.add(page.title) job.processed_cats.add(page.title)
@@ -243,8 +253,7 @@ class WikiProjectTagger(Task):
page = page.toggle_talk() page = page.toggle_talk()


if page.title in job.processed_pages: if page.title in job.processed_pages:
self.logger.debug("Skipping page, already processed: [[%s]]",
page.title)
self.logger.debug("Skipping page, already processed: [[%s]]", page.title)
return return
job.processed_pages.add(page.title) job.processed_pages.add(page.title)


@@ -275,21 +284,22 @@ class WikiProjectTagger(Task):
return return


if job.only_with: if job.only_with:
if not any(template.name.matches(job.only_with)
for template in code.ifilter_templates(recursive=True)):
if not any(
template.name.matches(job.only_with)
for template in code.ifilter_templates(recursive=True)
):
log = "Skipping page: [[%s]]; fails only-with condition" log = "Skipping page: [[%s]]; fails only-with condition"
self.logger.info(log, page.title) self.logger.info(log, page.title)
return return


if is_update: if is_update:
old_banner = str(banner)
self.update_banner(banner, job, code)
if banner == old_banner:
updated = self.update_banner(banner, job, code)
if not updated:
log = "Skipping page: [[%s]]; already tagged and no updates" log = "Skipping page: [[%s]]; already tagged and no updates"
self.logger.info(log, page.title) self.logger.info(log, page.title)
return return
self.logger.info("Updating banner on page: [[%s]]", page.title) self.logger.info("Updating banner on page: [[%s]]", page.title)
banner = banner.encode("utf8")
banner = str(banner)
else: else:
self.logger.info("Tagging page: [[%s]]", page.title) self.logger.info("Tagging page: [[%s]]", page.title)
banner = self.make_banner(job, code) banner = self.make_banner(job, code)
@@ -334,9 +344,11 @@ class WikiProjectTagger(Task):


def update_banner(self, banner, job, code): def update_banner(self, banner, job, code):
"""Update an existing *banner* based on a *job* and a page's *code*.""" """Update an existing *banner* based on a *job* and a page's *code*."""
has = lambda key: (banner.has(key) and
banner.get(key).value.strip() not in ("", "?"))
has = lambda key: (
banner.has(key) and banner.get(key).value.strip() not in ("", "?")
)


updated = False
if job.autoassess is not False: if job.autoassess is not False:
if not has("class"): if not has("class"):
assess, reason = self.get_autoassessment(code, job.autoassess) assess, reason = self.get_autoassessment(code, job.autoassess)
@@ -349,6 +361,8 @@ class WikiProjectTagger(Task):
key, value = param.split("=", 1) key, value = param.split("=", 1)
if not has(key): if not has(key):
banner.add(key, value) banner.add(key, value)
updated = True
return updated


def get_autoassessment(self, code, only_classes=None): def get_autoassessment(self, code, only_classes=None):
"""Get an autoassessment for a page. """Get an autoassessment for a page.
@@ -356,16 +370,27 @@ class WikiProjectTagger(Task):
Return (assessed class as a string or None, assessment reason or None). Return (assessed class as a string or None, assessment reason or None).
""" """
if only_classes is None or only_classes is True: if only_classes is None or only_classes is True:
classnames = ["a", "b", "book", "c", "dab", "fa", "fl", "ga",
"list", "redirect", "start", "stub"]
classnames = [
"a",
"b",
"book",
"c",
"dab",
"fa",
"fl",
"ga",
"list",
"redirect",
"start",
"stub",
]
else: else:
classnames = [klass.strip().lower()
for klass in only_classes.split(",")]
classnames = [klass.strip().lower() for klass in only_classes.split(",")]


classes = {klass: 0 for klass in classnames} classes = {klass: 0 for klass in classnames}
for template in code.ifilter_templates(recursive=True): for template in code.ifilter_templates(recursive=True):
if template.has("class"): if template.has("class"):
value = str(template.get("class").value).lower()
value = str(template.get("class").value).strip().lower()
if value in classes: if value in classes:
classes[value] += 1 classes[value] += 1


@@ -429,6 +454,7 @@ class WikiProjectTagger(Task):
self.logger.debug("Inserting banner at beginning") self.logger.debug("Inserting banner at beginning")
code.insert(0, banner + "\n") code.insert(0, banner + "\n")



class _Job: class _Job:
"""Represents a single wikiproject-tagging task. """Represents a single wikiproject-tagging task.


@@ -436,6 +462,7 @@ class _Job:
or not to autoassess and create new pages from scratch, and a counter of or not to autoassess and create new pages from scratch, and a counter of
the number of pages edited. the number of pages edited.
""" """

def __init__(self, **kwargs): def __init__(self, **kwargs):
self.banner = kwargs["banner"] self.banner = kwargs["banner"]
self.names = kwargs["names"] self.names = kwargs["names"]
@@ -456,4 +483,5 @@ class _Job:
class _ShutoffEnabled(Exception): class _ShutoffEnabled(Exception):
"""Raised by process_page() if shutoff is enabled. Caught by run(), which """Raised by process_page() if shutoff is enabled. Caught by run(), which
will then stop the task.""" will then stop the task."""

pass pass

+ 12
- 7
earwigbot/wiki/page.py View File

@@ -280,7 +280,7 @@ class Page(CopyvioMixIn):
self._assert_existence() self._assert_existence()


def _edit(self, params=None, text=None, summary=None, minor=None, bot=None, def _edit(self, params=None, text=None, summary=None, minor=None, bot=None,
force=None, section=None, captcha_id=None, captcha_word=None):
force=None, section=None, captcha_id=None, captcha_word=None, **kwargs):
"""Edit the page! """Edit the page!


If *params* is given, we'll use it as our API query parameters. If *params* is given, we'll use it as our API query parameters.
@@ -297,7 +297,7 @@ class Page(CopyvioMixIn):
# Build our API query string: # Build our API query string:
if not params: if not params:
params = self._build_edit_params(text, summary, minor, bot, force, params = self._build_edit_params(text, summary, minor, bot, force,
section, captcha_id, captcha_word)
section, captcha_id, captcha_word, kwargs)
else: # Make sure we have the right token: else: # Make sure we have the right token:
params["token"] = self.site.get_token() params["token"] = self.site.get_token()


@@ -320,7 +320,7 @@ class Page(CopyvioMixIn):
raise exceptions.EditError(result["edit"]) raise exceptions.EditError(result["edit"])


def _build_edit_params(self, text, summary, minor, bot, force, section, def _build_edit_params(self, text, summary, minor, bot, force, section,
captcha_id, captcha_word):
captcha_id, captcha_word, kwargs):
"""Given some keyword arguments, build an API edit query string.""" """Given some keyword arguments, build an API edit query string."""
unitxt = text.encode("utf8") if isinstance(text, str) else text unitxt = text.encode("utf8") if isinstance(text, str) else text
hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct hashed = md5(unitxt).hexdigest() # Checksum to ensure text is correct
@@ -351,6 +351,11 @@ class Page(CopyvioMixIn):
else: else:
params["recreate"] = "true" params["recreate"] = "true"


for key, val in kwargs.items():
if val is None:
params.pop(key, None)
else:
params[key] = val
return params return params


def _handle_edit_errors(self, error, params, retry=True): def _handle_edit_errors(self, error, params, retry=True):
@@ -657,7 +662,7 @@ class Page(CopyvioMixIn):
""" """
return mwparserfromhell.parse(self.get()) return mwparserfromhell.parse(self.get())


def edit(self, text, summary, minor=False, bot=True, force=False):
def edit(self, text, summary, minor=False, bot=True, force=False, **kwargs):
"""Replace the page's content or creates a new page. """Replace the page's content or creates a new page.


*text* is the new page content, with *summary* as the edit summary. *text* is the new page content, with *summary* as the edit summary.
@@ -670,9 +675,9 @@ class Page(CopyvioMixIn):
editing our page. Be careful with this! editing our page. Be careful with this!
""" """
self._edit(text=text, summary=summary, minor=minor, bot=bot, self._edit(text=text, summary=summary, minor=minor, bot=bot,
force=force)
force=force, **kwargs)


def add_section(self, text, title, minor=False, bot=True, force=False):
def add_section(self, text, title, minor=False, bot=True, force=False, **kwargs):
"""Add a new section to the bottom of the page. """Add a new section to the bottom of the page.


The arguments for this are the same as those for :py:meth:`edit`, but The arguments for this are the same as those for :py:meth:`edit`, but
@@ -683,7 +688,7 @@ class Page(CopyvioMixIn):
new section as content. new section as content.
""" """
self._edit(text=text, summary=title, minor=minor, bot=bot, force=force, self._edit(text=text, summary=title, minor=minor, bot=bot, force=force,
section="new")
section="new", **kwargs)


def check_exclusion(self, username=None, optouts=None): def check_exclusion(self, username=None, optouts=None):
"""Check whether or not we are allowed to edit the page. """Check whether or not we are allowed to edit the page.


+ 129
- 61
earwigbot/wiki/site.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -37,10 +37,11 @@ from earwigbot.wiki.category import Category
from earwigbot.wiki.page import Page from earwigbot.wiki.page import Page
from earwigbot.wiki.user import User from earwigbot.wiki.user import User


oursql = importer.new("oursql")
pymysql = importer.new("pymysql")


__all__ = ["Site"] __all__ = ["Site"]



class Site: class Site:
""" """
**EarwigBot: Wiki Toolset: Site** **EarwigBot: Wiki Toolset: Site**
@@ -80,18 +81,41 @@ class Site:
- :py:meth:`get_user`: returns a User object for the given name - :py:meth:`get_user`: returns a User object for the given name
- :py:meth:`delegate`: controls when the API or SQL is used - :py:meth:`delegate`: controls when the API or SQL is used
""" """

SERVICE_API = 1 SERVICE_API = 1
SERVICE_SQL = 2 SERVICE_SQL = 2
SPECIAL_TOKENS = ["createaccount", "deleteglobalaccount", "login",
"patrol", "rollback", "setglobalaccountstatus",
"userrights", "watch"]

def __init__(self, name=None, project=None, lang=None, base_url=None,
article_path=None, script_path=None, sql=None,
namespaces=None, login=(None, None), oauth=None,
cookiejar=None, user_agent=None, use_https=True,
assert_edit=None, maxlag=None, wait_between_queries=1,
logger=None, search_config=None):
SPECIAL_TOKENS = [
"createaccount",
"deleteglobalaccount",
"login",
"patrol",
"rollback",
"setglobalaccountstatus",
"userrights",
"watch",
]

def __init__(
self,
name=None,
project=None,
lang=None,
base_url=None,
article_path=None,
script_path=None,
sql=None,
namespaces=None,
login=(None, None),
oauth=None,
cookiejar=None,
user_agent=None,
use_https=True,
assert_edit=None,
maxlag=None,
wait_between_queries=1,
logger=None,
search_config=None,
):
"""Constructor for new Site instances. """Constructor for new Site instances.


This probably isn't necessary to call yourself unless you're building a This probably isn't necessary to call yourself unless you're building a
@@ -160,8 +184,11 @@ class Site:
self._session.headers["User-Agent"] = user_agent self._session.headers["User-Agent"] = user_agent
if oauth: if oauth:
self._session.auth = OAuth1( self._session.auth = OAuth1(
oauth["consumer_token"], oauth["consumer_secret"],
oauth["access_token"], oauth["access_secret"])
oauth["consumer_token"],
oauth["consumer_secret"],
oauth["access_token"],
oauth["access_secret"],
)


# Set up our internal logger: # Set up our internal logger:
if logger: if logger:
@@ -182,13 +209,24 @@ class Site:


def __repr__(self): def __repr__(self):
"""Return the canonical string representation of the Site.""" """Return the canonical string representation of the Site."""
res = ", ".join((
"Site(name={_name!r}", "project={_project!r}", "lang={_lang!r}",
"base_url={_base_url!r}", "article_path={_article_path!r}",
"script_path={_script_path!r}", "use_https={_use_https!r}",
"assert_edit={_assert_edit!r}", "maxlag={_maxlag!r}",
"sql={_sql_data!r}", "login={0}", "oauth={1}", "user_agent={3!r}",
"cookiejar={2})"))
res = ", ".join(
(
"Site(name={_name!r}",
"project={_project!r}",
"lang={_lang!r}",
"base_url={_base_url!r}",
"article_path={_article_path!r}",
"script_path={_script_path!r}",
"use_https={_use_https!r}",
"assert_edit={_assert_edit!r}",
"maxlag={_maxlag!r}",
"sql={_sql_data!r}",
"login={0}",
"oauth={1}",
"user_agent={3!r}",
"cookiejar={2})",
)
)
name, password = self._login_info name, password = self._login_info
login = "({0}, {1})".format(repr(name), "hidden" if password else None) login = "({0}, {1})".format(repr(name), "hidden" if password else None)
oauth = "hidden" if self._oauth else None oauth = "hidden" if self._oauth else None
@@ -211,8 +249,15 @@ class Site:
return value return value
return str(value, encoding) return str(value, encoding)


def _api_query(self, params, tries=0, wait=5, ignore_maxlag=False,
no_assert=False, ae_retry=True):
def _api_query(
self,
params,
tries=0,
wait=5,
ignore_maxlag=False,
no_assert=False,
ae_retry=True,
):
"""Do an API query with *params* as a dict of parameters. """Do an API query with *params* as a dict of parameters.


See the documentation for :py:meth:`api_query` for full implementation See the documentation for :py:meth:`api_query` for full implementation
@@ -348,8 +393,14 @@ class Site:
""" """
# All attributes to be loaded, except _namespaces, which is a special # All attributes to be loaded, except _namespaces, which is a special
# case because it requires additional params in the API query: # case because it requires additional params in the API query:
attrs = [self._name, self._project, self._lang, self._base_url,
self._article_path, self._script_path]
attrs = [
self._name,
self._project,
self._lang,
self._base_url,
self._article_path,
self._script_path,
]


params = {"action": "query", "meta": "siteinfo", "siprop": "general"} params = {"action": "query", "meta": "siteinfo", "siprop": "general"}


@@ -359,7 +410,7 @@ class Site:
result = self._api_query(params, no_assert=True) result = self._api_query(params, no_assert=True)
self._load_namespaces(result) self._load_namespaces(result)
elif all(attrs): # Everything is already specified and we're not told elif all(attrs): # Everything is already specified and we're not told
return # to force a reload, so do nothing
return # to force a reload, so do nothing
else: # We're only loading attributes other than _namespaces else: # We're only loading attributes other than _namespaces
with self._api_lock: with self._api_lock:
result = self._api_query(params, no_assert=True) result = self._api_query(params, no_assert=True)
@@ -424,11 +475,11 @@ class Site:
(for that, we'd do self._login_info[0]), but rather to get our current (for that, we'd do self._login_info[0]), but rather to get our current
username without an unnecessary ?action=query&meta=userinfo API query. username without an unnecessary ?action=query&meta=userinfo API query.
""" """
name = ''.join((self._name, "Token"))
name = "".join((self._name, "Token"))
cookie = self._get_cookie(name, self.domain) cookie = self._get_cookie(name, self.domain)


if cookie: if cookie:
name = ''.join((self._name, "UserName"))
name = "".join((self._name, "UserName"))
user_name = self._get_cookie(name, self.domain) user_name = self._get_cookie(name, self.domain)
if user_name: if user_name:
return unquote_plus(user_name.value) return unquote_plus(user_name.value)
@@ -528,8 +579,12 @@ class Site:
except KeyError: except KeyError:
raise exceptions.LoginError("Couldn't get login token") raise exceptions.LoginError("Couldn't get login token")


params = {"action": "login", "lgname": name, "lgpassword": password,
"lgtoken": token}
params = {
"action": "login",
"lgname": name,
"lgpassword": password,
"lgtoken": token,
}
with self._api_lock: with self._api_lock:
result = self._api_query(params, no_assert=True) result = self._api_query(params, no_assert=True)


@@ -564,18 +619,22 @@ class Site:
def _sql_connect(self, **kwargs): def _sql_connect(self, **kwargs):
"""Attempt to establish a connection with this site's SQL database. """Attempt to establish a connection with this site's SQL database.


oursql.connect() will be called with self._sql_data as its kwargs.
pymysql.connect() will be called with self._sql_data as its kwargs.
Any kwargs given to this function will be passed to connect() and will Any kwargs given to this function will be passed to connect() and will
have precedence over the config file. have precedence over the config file.


Will raise SQLError() if the module "oursql" is not available. oursql
may raise its own exceptions (e.g. oursql.InterfaceError) if it cannot
Will raise SQLError() if the module "pymysql" is not available. pymysql
may raise its own exceptions (e.g. pymysql.InterfaceError) if it cannot
establish a connection. establish a connection.
""" """
args = self._sql_data args = self._sql_data
for key, value in kwargs.items(): for key, value in kwargs.items():
args[key] = value args[key] = value
if "read_default_file" not in args and "user" not in args and "passwd" not in args:
if (
"read_default_file" not in args
and "user" not in args
and "passwd" not in args
):
args["read_default_file"] = expanduser("~/.my.cnf") args["read_default_file"] = expanduser("~/.my.cnf")
elif "read_default_file" in args: elif "read_default_file" in args:
args["read_default_file"] = expanduser(args["read_default_file"]) args["read_default_file"] = expanduser(args["read_default_file"])
@@ -585,9 +644,9 @@ class Site:
args["autoreconnect"] = True args["autoreconnect"] = True


try: try:
self._sql_conn = oursql.connect(**args)
self._sql_conn = pymysql.connect(**args)
except ImportError: except ImportError:
e = "SQL querying requires the 'oursql' package: https://pythonhosted.org/oursql/"
e = "SQL querying requires the 'pymysql' package: https://pymysql.readthedocs.io/"
raise exceptions.SQLError(e) raise exceptions.SQLError(e)


def _get_service_order(self): def _get_service_order(self):
@@ -608,8 +667,11 @@ class Site:
if now - self._sql_info_cache["lastcheck"] > 120: if now - self._sql_info_cache["lastcheck"] > 120:
self._sql_info_cache["lastcheck"] = now self._sql_info_cache["lastcheck"] = now
try: try:
self._sql_info_cache["replag"] = sqllag = self.get_replag()
except (exceptions.SQLError, oursql.Error):
try:
self._sql_info_cache["replag"] = sqllag = self.get_replag()
except pymysql.Error as exc:
raise exceptions.SQLError(str(exc))
except (exceptions.SQLError, ImportError):
self._sql_info_cache["usable"] = False self._sql_info_cache["usable"] = False
return [self.SERVICE_API] return [self.SERVICE_API]
self._sql_info_cache["usable"] = True self._sql_info_cache["usable"] = True
@@ -705,24 +767,31 @@ class Site:
with self._api_lock: with self._api_lock:
return self._api_query(kwargs) return self._api_query(kwargs)


def sql_query(self, query, params=(), plain_query=False, dict_cursor=False,
cursor_class=None, show_table=False, buffsize=1024):
def sql_query(
self,
query,
params=(),
plain_query=False,
dict_cursor=False,
cursor_class=None,
buffsize=1024,
):
"""Do an SQL query and yield its results. """Do an SQL query and yield its results.


If *plain_query* is ``True``, we will force an unparameterized query. If *plain_query* is ``True``, we will force an unparameterized query.
Specifying both *params* and *plain_query* will cause an error. If Specifying both *params* and *plain_query* will cause an error. If
*dict_cursor* is ``True``, we will use :py:class:`oursql.DictCursor` as
our cursor, otherwise the default :py:class:`oursql.Cursor`. If
*cursor_class* is given, it will override this option. If *show_table*
is True, the name of the table will be prepended to the name of the
column. This will mainly affect an :py:class:`~oursql.DictCursor`.
*dict_cursor* is ``True``, we will use
:py:class:`pymysql.cursors.DictCursor` as our cursor, otherwise the
default :py:class:`pymysql.cursors.Cursor`. If *cursor_class* is given,
it will override this option.


*buffsize* is the size of each memory-buffered group of results, to *buffsize* is the size of each memory-buffered group of results, to
reduce the number of conversations with the database; it is passed to reduce the number of conversations with the database; it is passed to
:py:meth:`cursor.fetchmany() <oursql.Cursor.fetchmany>`. If set to
``0```, all results will be buffered in memory at once (this uses
:py:meth:`fetchall() <oursql.Cursor.fetchall>`). If set to ``1``, it is
equivalent to using :py:meth:`fetchone() <oursql.Cursor.fetchone>`.
:py:meth:`cursor.fetchmany() <pymysql.cursors.Cursor.fetchmany>`. If
set to ``0```, all results will be buffered in memory at once (this
uses :py:meth:`fetchall() <pymysql.cursors.Cursor.fetchall>`). If set
to ``1``, it is equivalent to using
:py:meth:`fetchone() <pymysql.cursors.Cursor.fetchone>`.


Example usage:: Example usage::


@@ -736,25 +805,25 @@ class Site:
{'user_id': 7418060L, 'user_registration': '20080703215134'} {'user_id': 7418060L, 'user_registration': '20080703215134'}


This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
:py:exc:`oursql.InterfaceError`, ...) if there were problems with the
pymysql's exceptions (:py:exc:`pymysql.ProgrammingError`,
:py:exc:`pymysql.InterfaceError`, ...) if there were problems with the
query. query.


See :py:meth:`_sql_connect` for information on how a connection is See :py:meth:`_sql_connect` for information on how a connection is
acquired. Also relevant is `oursql's documentation
<https://pythonhosted.org/oursql/>`_ for details on that package.
acquired. Also relevant is `pymysql's documentation
<https://pymysql.readthedocs.io/>`_ for details on that package.
""" """
if not cursor_class: if not cursor_class:
if dict_cursor: if dict_cursor:
cursor_class = oursql.DictCursor
cursor_class = pymysql.cursors.DictCursor
else: else:
cursor_class = oursql.Cursor
cursor_class = pymysql.cursors.Cursor
klass = cursor_class klass = cursor_class


with self._sql_lock: with self._sql_lock:
if not self._sql_conn: if not self._sql_conn:
self._sql_connect() self._sql_connect()
with self._sql_conn.cursor(klass, show_table=show_table) as cur:
with self._sql_conn.cursor(klass) as cur:
cur.execute(query, params, plain_query) cur.execute(query, params, plain_query)
if buffsize: if buffsize:
while True: while True:
@@ -798,8 +867,8 @@ class Site:
time from the timestamp of the latest recent changes event. time from the timestamp of the latest recent changes event.


This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of This may raise :py:exc:`~earwigbot.exceptions.SQLError` or one of
oursql's exceptions (:py:exc:`oursql.ProgrammingError`,
:py:exc:`oursql.InterfaceError`, ...) if there were problems.
pymysql's exceptions (:py:exc:`pymysql.ProgrammingError`,
:py:exc:`pymysql.InterfaceError`, ...) if there were problems.
""" """
query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM query = """SELECT UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) FROM
recentchanges ORDER BY rc_timestamp DESC LIMIT 1""" recentchanges ORDER BY rc_timestamp DESC LIMIT 1"""
@@ -886,8 +955,7 @@ class Site:
prefix = title.split(":", 1)[0] prefix = title.split(":", 1)[0]
if prefix != title: # Avoid a page that is simply "Category" if prefix != title: # Avoid a page that is simply "Category"
if prefix in prefixes: if prefix in prefixes:
return Category(self, title, follow_redirects, pageid,
self._logger)
return Category(self, title, follow_redirects, pageid, self._logger)
return Page(self, title, follow_redirects, pageid, self._logger) return Page(self, title, follow_redirects, pageid, self._logger)


def get_category(self, catname, follow_redirects=False, pageid=None): def get_category(self, catname, follow_redirects=False, pageid=None):
@@ -899,7 +967,7 @@ class Site:
""" """
catname = self._unicodeify(catname) catname = self._unicodeify(catname)
prefix = self.namespace_id_to_name(constants.NS_CATEGORY) prefix = self.namespace_id_to_name(constants.NS_CATEGORY)
pagename = ':'.join((prefix, catname))
pagename = ":".join((prefix, catname))
return Category(self, pagename, follow_redirects, pageid, self._logger) return Category(self, pagename, follow_redirects, pageid, self._logger)


def get_user(self, username=None): def get_user(self, username=None):


+ 62
- 24
earwigbot/wiki/sitesdb.py View File

@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -35,6 +35,7 @@ from earwigbot.wiki.site import Site


__all__ = ["SitesDB"] __all__ = ["SitesDB"]



class SitesDB: class SitesDB:
""" """
**EarwigBot: Wiki Toolset: Sites Database Manager** **EarwigBot: Wiki Toolset: Sites Database Manager**
@@ -106,7 +107,7 @@ class SitesDB:
# Create the file and restrict reading/writing only to the # Create the file and restrict reading/writing only to the
# owner, so others can't peak at our cookies: # owner, so others can't peak at our cookies:
open(self._cookie_file, "w").close() open(self._cookie_file, "w").close()
chmod(self._cookie_file, stat.S_IRUSR|stat.S_IWUSR)
chmod(self._cookie_file, stat.S_IRUSR | stat.S_IWUSR)
else: else:
raise raise


@@ -172,8 +173,16 @@ class SitesDB:
except KeyError: except KeyError:
namespaces[ns_id] = [ns_name] namespaces[ns_id] = [ns_name]


return (name, project, lang, base_url, article_path, script_path, sql,
namespaces)
return (
name,
project,
lang,
base_url,
article_path,
script_path,
sql,
namespaces,
)


def _make_site_object(self, name): def _make_site_object(self, name):
"""Return a Site object associated with the site *name* in our sitesdb. """Return a Site object associated with the site *name* in our sitesdb.
@@ -182,8 +191,9 @@ class SitesDB:
raised if the site is not in our sitesdb. raised if the site is not in our sitesdb.
""" """
cookiejar = self._get_cookiejar() cookiejar = self._get_cookiejar()
(name, project, lang, base_url, article_path, script_path, sql,
namespaces) = self._load_site_from_sitesdb(name)
(name, project, lang, base_url, article_path, script_path, sql, namespaces) = (
self._load_site_from_sitesdb(name)
)


config = self.config config = self.config
login = (config.wiki.get("username"), config.wiki.get("password")) login = (config.wiki.get("username"), config.wiki.get("password"))
@@ -211,13 +221,26 @@ class SitesDB:
if isinstance(value, str) and "$1" in value: if isinstance(value, str) and "$1" in value:
sql[key] = value.replace("$1", name) sql[key] = value.replace("$1", name)


return Site(name=name, project=project, lang=lang, base_url=base_url,
article_path=article_path, script_path=script_path,
sql=sql, namespaces=namespaces, login=login, oauth=oauth,
cookiejar=cookiejar, user_agent=user_agent,
use_https=use_https, assert_edit=assert_edit,
maxlag=maxlag, wait_between_queries=wait_between_queries,
logger=logger, search_config=search_config)
return Site(
name=name,
project=project,
lang=lang,
base_url=base_url,
article_path=article_path,
script_path=script_path,
sql=sql,
namespaces=namespaces,
login=login,
oauth=oauth,
cookiejar=cookiejar,
user_agent=user_agent,
use_https=use_https,
assert_edit=assert_edit,
maxlag=maxlag,
wait_between_queries=wait_between_queries,
logger=logger,
search_config=search_config,
)


def _get_site_name_from_sitesdb(self, project, lang): def _get_site_name_from_sitesdb(self, project, lang):
"""Return the name of the first site with the given project and lang. """Return the name of the first site with the given project and lang.
@@ -255,8 +278,14 @@ class SitesDB:
database. If the sitesdb doesn't exist, we'll create it first. database. If the sitesdb doesn't exist, we'll create it first.
""" """
name = site.name name = site.name
sites_data = (name, site.project, site.lang, site._base_url,
site._article_path, site._script_path)
sites_data = (
name,
site.project,
site.lang,
site._base_url,
site._article_path,
site._script_path,
)
sql_data = [(name, key, val) for key, val in site._sql_data.items()] sql_data = [(name, key, val) for key, val in site._sql_data.items()]
ns_data = [] ns_data = []
for ns_id, ns_names in site._namespaces.items(): for ns_id, ns_names in site._namespaces.items():
@@ -353,8 +382,9 @@ class SitesDB:
e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang) e = "Site '{0}:{1}' not found in the sitesdb.".format(project, lang)
raise SiteNotFoundError(e) raise SiteNotFoundError(e)


def add_site(self, project=None, lang=None, base_url=None,
script_path="/w", sql=None):
def add_site(
self, project=None, lang=None, base_url=None, script_path="/w", sql=None
):
"""Add a site to the sitesdb so it can be retrieved with get_site(). """Add a site to the sitesdb so it can be retrieved with get_site().


If only a project and a lang are given, we'll guess the *base_url* as If only a project and a lang are given, we'll guess the *base_url* as
@@ -368,8 +398,8 @@ class SitesDB:
your wiki is different, provide the script_path as an argument. SQL your wiki is different, provide the script_path as an argument. SQL
connection settings are guessed automatically using config's template connection settings are guessed automatically using config's template
value. If this is wrong or not specified, provide a dict of kwargs as value. If this is wrong or not specified, provide a dict of kwargs as
*sql* and Site will pass it to :py:func:`oursql.connect(**sql)
<oursql.connect>`, allowing you to make queries with
*sql* and Site will pass it to :py:func:`pymysql.connect(**sql)
<pymysql.connect>`, allowing you to make queries with
:py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`. :py:meth:`site.sql_query <earwigbot.wiki.site.Site.sql_query>`.


Returns ``True`` if the site was added successfully or ``False`` if the Returns ``True`` if the site was added successfully or ``False`` if the
@@ -399,11 +429,19 @@ class SitesDB:
user_agent = user_agent.replace("$2", python_version()) user_agent = user_agent.replace("$2", python_version())


# Create a Site object to log in and load the other attributes: # Create a Site object to log in and load the other attributes:
site = Site(base_url=base_url, script_path=script_path, sql=sql,
login=login, oauth=oauth, cookiejar=cookiejar,
user_agent=user_agent, use_https=use_https,
assert_edit=assert_edit, maxlag=maxlag,
wait_between_queries=wait_between_queries)
site = Site(
base_url=base_url,
script_path=script_path,
sql=sql,
login=login,
oauth=oauth,
cookiejar=cookiejar,
user_agent=user_agent,
use_https=use_https,
assert_edit=assert_edit,
maxlag=maxlag,
wait_between_queries=wait_between_queries,
)


self._logger.info("Added site '{0}'".format(site.name)) self._logger.info("Added site '{0}'".format(site.name))
self._add_site_to_sitesdb(site) self._add_site_to_sitesdb(site)


+ 19
- 19
setup.py View File

@@ -1,7 +1,7 @@
#! /usr/bin/env python #! /usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Copyright (C) 2009-2021 Ben Kurtovic <ben.kurtovic@gmail.com>
# Copyright (C) 2009-2024 Ben Kurtovic <ben.kurtovic@gmail.com>
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy # Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal # of this software and associated documentation files (the "Software"), to deal
@@ -37,11 +37,11 @@ extra_deps = {
"cryptography >= 3.4.7", # Storing bot passwords + keys in the config file "cryptography >= 3.4.7", # Storing bot passwords + keys in the config file
], ],
"sql": [ "sql": [
"oursql3 >= 0.9.4", # Interfacing with MediaWiki databases
"pymysql >= 1.1.0", # Interfacing with MediaWiki databases
], ],
"copyvios": [ "copyvios": [
"beautifulsoup4 >= 4.9.3", # Parsing/scraping HTML "beautifulsoup4 >= 4.9.3", # Parsing/scraping HTML
"cchardet >= 2.1.7", # Encoding detection for BeautifulSoup
"charset_normalizer >= 3.3.2", # Encoding detection for BeautifulSoup
"lxml >= 4.6.3", # Faster parser for BeautifulSoup "lxml >= 4.6.3", # Faster parser for BeautifulSoup
"nltk >= 3.6.1", # Parsing sentences to split article content "nltk >= 3.6.1", # Parsing sentences to split article content
"pdfminer >= 20191125", # Extracting text from PDF files "pdfminer >= 20191125", # Extracting text from PDF files
@@ -58,21 +58,21 @@ with open("README.rst") as fp:
long_docs = fp.read() long_docs = fp.read()


setup( setup(
name = "earwigbot",
packages = find_packages(exclude=("tests",)),
entry_points = {"console_scripts": ["earwigbot = earwigbot.util:main"]},
install_requires = dependencies,
test_suite = "tests",
version = __version__,
author = "Ben Kurtovic",
author_email = "ben.kurtovic@gmail.com",
url = "https://github.com/earwig/earwigbot",
description = "EarwigBot is a Python robot that edits Wikipedia and interacts with people over IRC.",
long_description = long_docs,
download_url = "https://github.com/earwig/earwigbot/tarball/v{0}".format(__version__),
keywords = "earwig earwigbot irc wikipedia wiki mediawiki",
license = "MIT License",
classifiers = [
name="earwigbot",
packages=find_packages(exclude=("tests",)),
entry_points={"console_scripts": ["earwigbot = earwigbot.util:main"]},
install_requires=dependencies,
test_suite="tests",
version=__version__,
author="Ben Kurtovic",
author_email="ben.kurtovic@gmail.com",
url="https://github.com/earwig/earwigbot",
description="EarwigBot is a Python robot that edits Wikipedia and interacts with people over IRC.",
long_description=long_docs,
download_url="https://github.com/earwig/earwigbot/tarball/v{0}".format(__version__),
keywords="earwig earwigbot irc wikipedia wiki mediawiki",
license="MIT License",
classifiers=[
"Development Status :: 3 - Alpha", "Development Status :: 3 - Alpha",
"Environment :: Console", "Environment :: Console",
"Intended Audience :: Developers", "Intended Audience :: Developers",
@@ -81,6 +81,6 @@ setup(
"Operating System :: OS Independent", "Operating System :: OS Independent",
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
"Topic :: Communications :: Chat :: Internet Relay Chat", "Topic :: Communications :: Chat :: Internet Relay Chat",
"Topic :: Internet :: WWW/HTTP"
"Topic :: Internet :: WWW/HTTP",
], ],
) )

Loading…
Cancel
Save